diff --git a/Dockerfile b/Dockerfile index 32f8b4a..3d4a846 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,7 @@ RUN npm run check:prettier RUN npm run build RUN npm run test RUN npm run scan_test_pdfs +RUN ./scripts/check_update_all_generated_DBs.sh RUN [ -f "edit_and_run.sh" ] && cat edit_and_run.sh && exit 1 || echo "Build DB succeeded" WORKDIR /jendeley/jendeley-frontend diff --git a/jendeley-backend/.npmignore b/jendeley-backend/.npmignore index 7d71e45..daeb82b 100644 --- a/jendeley-backend/.npmignore +++ b/jendeley-backend/.npmignore @@ -18,3 +18,4 @@ benchmark.svg perf.data* processed-isolate-*.txt processed-isolate-*.txt +generated_DBs diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.0.json new file mode 100644 index 0000000..5a963b8 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.0.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.0" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.1.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.1.json new file mode 100644 index 0000000..6fa1618 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.1.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.1" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.10.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.10.json new file mode 100644 index 0000000..8506e50 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.10.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.10" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.2.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.2.json new file mode 100644 index 0000000..4c95503 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.2.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.2" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.3.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.3.json new file mode 100644 index 0000000..56f05bc --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.3.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.3" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.4.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.4.json new file mode 100644 index 0000000..9f39c1a --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.4.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.4" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.5.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.5.json new file mode 100644 index 0000000..fc01bf3 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.5.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.5" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.6.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.6.json new file mode 100644 index 0000000..6fc0693 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.6.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.6" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.7.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.7.json new file mode 100644 index 0000000..e5bbeb4 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.7.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.7" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.8.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.8.json new file mode 100644 index 0000000..18ca0be --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.8.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.8" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.0.9.json b/jendeley-backend/generated_DBs/jendeley_db_1.0.9.json new file mode 100644 index 0000000..0255229 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.0.9.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.0.9" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.1.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.1.0.json new file mode 100644 index 0000000..53cc794 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.1.0.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.1.0" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.1.1.json b/jendeley-backend/generated_DBs/jendeley_db_1.1.1.json new file mode 100644 index 0000000..7810285 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.1.1.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.1.1" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.2.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.2.0.json new file mode 100644 index 0000000..72534a6 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.2.0.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.2.0" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_1.3.0.json b/jendeley-backend/generated_DBs/jendeley_db_1.3.0.json new file mode 100644 index 0000000..130fce7 --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_1.3.0.json @@ -0,0 +1,1555 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "1.3.0" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "comments": "" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 31 + ] + ], + "date-time": "2024-01-31T16:34:41Z", + "timestamp": 1706718881300 + }, + "reference-count": 31, + "publisher": "Elsevier BV", + "issue": "2", + "license": [ + { + "start": { + "date-parts": [ + [ + 1997, + 2, + 1 + ] + ], + "date-time": "1997-02-01T00:00:00Z", + "timestamp": 854755200000 + }, + "content-version": "tdm", + "delay-in-days": 0, + "URL": "https://www.elsevier.com/tdm/userlicense/1.0/" + }, + { + "start": { + "date-parts": [ + [ + 2013, + 7, + 17 + ] + ], + "date-time": "2013-07-17T00:00:00Z", + "timestamp": 1374019200000 + }, + "content-version": "vor", + "delay-in-days": 6010, + "URL": "https://www.elsevier.com/open-access/userlicense/1.0/" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "DOI": "10.1006/inco.1996.2613", + "type": "journal-article", + "created": { + "date-parts": [ + [ + 2002, + 10, + 6 + ] + ], + "date-time": "2002-10-06T17:10:40Z", + "timestamp": 1033924240000 + }, + "page": "109-176", + "source": "Crossref", + "is-referenced-by-count": 384, + "title": "Region-Based Memory Management", + "prefix": "10.1006", + "volume": "132", + "author": [ + { + "given": "Mads", + "family": "Tofte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "Jean-Pierre", + "family": "Talpin", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "78", + "reference": [ + { + "key": "10.1006/inco.1996.2613_IC962613RF1", + "doi-asserted-by": "crossref", + "unstructured": "A. Aiken, M. Fähndrich, R. Levein, Better static memory management: Improving region-based analysis of higher-order languages, Proceedings of the ACM SIGPLAN '95 Conference on Programming Languages and Implementation (PLDI), La Jolla, CA, June 1995, ACM Press", + "DOI": "10.1145/207110.207137" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF2", + "series-title": "Compiling with Continuations", + "author": "Appel", + "year": "1992" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF3", + "doi-asserted-by": "crossref", + "first-page": "280", + "DOI": "10.1145/359460.359470", + "article-title": "List processing in real time on a serial computer", + "volume": "21", + "author": "Baker", + "year": "1978", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF4", + "doi-asserted-by": "crossref", + "unstructured": "H. G. Baker, Unify and conquer (garbage collection, updating, aliasing, …) in functional languages, Proceedings of the 1990 ACM Conference on Lisp and Functional Programming, June 1990,", + "DOI": "10.1145/91556.91652" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF5", + "doi-asserted-by": "crossref", + "unstructured": "L. Birkedal, M. Tofte, M. Vejlstrup, 1996, From region inference to von Neumann machines via region representation inference, Proceedings of the 23rd ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, ACM Press", + "DOI": "10.1145/237721.237771" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF6", + "unstructured": "J. M. L. D. K. Gifford, P. Jouvelot, M. Sheldon, 1987, Fx-87 Reference Manual, MIT Laboratory for Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF7", + "series-title": "Proceedings, 9th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Principal type schemes for functional programs", + "author": "Damas", + "year": "1982" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8A", + "doi-asserted-by": "crossref", + "first-page": "312", + "DOI": "10.1007/BF01386232", + "article-title": "Recursive programming", + "volume": "2", + "author": "Dijkstra", + "year": "1960", + "journal-title": "Numer. Math" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF8B", + "series-title": "Programming Systems and Languages", + "author": "Rosen", + "year": "1967" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF9", + "series-title": "An Optimizing Backend for the ML Kit Using a Stack of Regions", + "author": "Elsman", + "year": "1995" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10", + "doi-asserted-by": "crossref", + "first-page": "603", + "DOI": "10.1145/1780.1803", + "article-title": "Transformations and reduction strategies for typed lambda expressions", + "volume": "6", + "author": "Georgeff", + "year": "1984", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF10A", + "series-title": "A region profiler for a standard ML compiler based on region inference", + "author": "Hallenberg", + "year": "1996" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF11", + "doi-asserted-by": "crossref", + "unstructured": "P. Hudak, A semantic model of reference counting and its abstraction, ACM Symposium on List and Functional Programming, 1986", + "DOI": "10.1145/319838.319876" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF12", + "doi-asserted-by": "crossref", + "unstructured": "P. Jouvelot, D. Gifford, Algebraic reconstruction of types and effects, Proceedings of the 18th ACM Symposium on Principles of Programming Languages (POPL), 1991.", + "DOI": "10.1145/99583.99623" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF13", + "doi-asserted-by": "crossref", + "first-page": "555", + "DOI": "10.1145/48022.48025", + "article-title": "Analysis of functional programs to detect run-time garbage cells", + "volume": "10", + "author": "Katsuro Inoue", + "year": "1988", + "journal-title": "ACM Trans. Programming Languages Systems" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF14", + "series-title": "Fundamental Algorithms", + "volume": "Vol. 1", + "author": "Knuth", + "year": "1972" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF15", + "doi-asserted-by": "crossref", + "first-page": "419", + "DOI": "10.1145/358141.358147", + "article-title": "A real-time garbage collector based on the lifetimes of objects", + "volume": "26", + "author": "Lieberman", + "year": "1983", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF16", + "doi-asserted-by": "crossref", + "unstructured": "J. Lucassen, D. Gifford, Polymorphic effect systems, Proceedings of the 1988 ACM Conference on Principle of Programming Languages, 1988", + "DOI": "10.1145/73560.73564" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF17", + "unstructured": "J. M. Lucassen, 1987, Types and Effects, towards the Integration of Functional and Imperative Programming, MIT Laboratory for Computer Science; MIT/LCS/TR-408" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF18", + "doi-asserted-by": "crossref", + "first-page": "348", + "DOI": "10.1016/0022-0000(78)90014-4", + "article-title": "A theory of type polymorphism in programming", + "volume": "17", + "author": "Milner", + "year": "1978", + "journal-title": "J. Comput. System Sci." + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF19", + "series-title": "The Definition of Standard ML", + "author": "Milner", + "year": "1990" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF20", + "doi-asserted-by": "crossref", + "DOI": "10.1007/3-540-12925-1_41", + "article-title": "Polymorphic type schemes and recursive definitions", + "volume": "Vol. 167", + "author": "Mycroft", + "year": "1984", + "journal-title": "Lecture Notes in Computer Science" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21", + "first-page": "1", + "article-title": "Revised report on the algorithmic language Algol 60", + "volume": "1", + "author": "Naur", + "year": "1963", + "journal-title": "Comm. ACM" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF21A", + "doi-asserted-by": "crossref", + "unstructured": "H. R. Nielson, F. Nielson, Jan. 1994, Higher-order concurrent programs with finite communication topology, Conference Record of POPL'94: 21 st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages, Assoc. Comput. Mach. Press", + "DOI": "10.1145/174675.174538" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF22", + "series-title": "Proceedings of the 15th Annual ACM Symposium on Principles of Programming Languages", + "article-title": "Lifetime analysis of dynamically allocated objects", + "author": "Ruggieri", + "year": "1988" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23A", + "series-title": "Theoretical and Practical Aspects of Type and Effect Inference", + "author": "Talpin", + "year": "1993" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF23B", + "unstructured": "Ecole des Mines de Paris" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF24", + "doi-asserted-by": "crossref", + "DOI": "10.1017/S0956796800000393", + "article-title": "Polymorphic type, region and effect inference", + "volume": "2", + "author": "Talpin", + "year": "1992", + "journal-title": "J. Funct. Programming" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF25", + "unstructured": "M. Tofte, J.-P. Talpin, 1993, A Theory of Stack Allocation in Polymorphically Typed Languages, Department of Computer Science, University of Copenhagen" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF26", + "series-title": "Proceedings of the 21st ACM SIGPLAN–SIGACT Symposium on Principles of Programming Languages", + "article-title": "Implementing the call-by-value lambda-calculus using a stack of regions", + "author": "Tofte", + "year": "1994" + }, + { + "key": "10.1006/inco.1996.2613_IC962613RF27", + "doi-asserted-by": "crossref", + "unstructured": "D. N. Turner, P. Wadler, C. Mossin, June 1995, Once upon a type, Conference Record of FPCA'95, SIGPLAN–SIGARCH–WG2.8 Conference on Functional Programming Languages and Computer Architecture, Assoc. Comput. Mach. Press", + "DOI": "10.1145/224164.224168" + } + ], + "container-title": "Information and Computation", + "original-title": [], + "language": "en", + "link": [ + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/xml", + "content-type": "text/xml", + "content-version": "vor", + "intended-application": "text-mining" + }, + { + "URL": "https://api.elsevier.com/content/article/PII:S0890540196926139?httpAccept=text/plain", + "content-type": "text/plain", + "content-version": "vor", + "intended-application": "text-mining" + } + ], + "deposited": { + "date-parts": [ + [ + 2019, + 12, + 17 + ] + ], + "date-time": "2019-12-17T03:20:37Z", + "timestamp": 1576552837000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S0890540196926139" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + }, + "references-count": 31, + "journal-issue": { + "issue": "2", + "published-print": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + }, + "alternative-id": [ + "S0890540196926139" + ], + "URL": "http://dx.doi.org/10.1006/inco.1996.2613", + "relation": {}, + "ISSN": [ + "0890-5401" + ], + "subject": [ + "Computational Theory and Mathematics", + "Computer Science Applications", + "Information Systems", + "Theoretical Computer Science" + ], + "container-title-short": "Information and Computation", + "published": { + "date-parts": [ + [ + 1997, + 2 + ] + ] + } + } + }, + "arxiv_1512.03385": { + "path": [ + "resnet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2002.09002v2", + "updated": "2020-06-11T06:31:16Z", + "published": "2020-02-20T20:28:08Z", + "title": "RustHorn: CHC-based Verification for Rust Programs (full version)", + "summary": " Reduction to the satisfiability problem for constrained Horn clauses (CHCs)\nis a widely studied approach to automated program verification. The current\nCHC-based methods for pointer-manipulating programs, however, are not very\nscalable. This paper proposes a novel translation of pointer-manipulating Rust\nprograms into CHCs, which clears away pointers and memories by leveraging\nownership. We formalize the translation for a simplified core of Rust and prove\nits correctness. We have implemented a prototype verifier for a subset of Rust\nand confirmed the effectiveness of our method.\n", + "author": [ + { + "name": "Yusuke Matsushita" + }, + { + "name": "Takeshi Tsukada" + }, + { + "name": "Naoki Kobayashi" + } + ], + "arxiv:doi": { + "_": "10.1007/978-3-030-44914-8_18", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "title": "doi", + "href": "http://dx.doi.org/10.1007/978-3-030-44914-8_18", + "rel": "related" + } + }, + { + "$": { + "href": "http://arxiv.org/abs/2002.09002v2", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2002.09002v2", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:comment": { + "_": "Full version of the same-titled paper in ESOP2020", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "book_0262162091_ch01.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch01.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch01", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_ch02.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "ch02.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_ch02", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + }, + "book_0262162091_title.pdf": { + "idType": "book", + "path": [ + "dummyTapl", + "title.pdf" + ], + "tags": [], + "comments": "", + "userSpecifiedTitle": "Types and Programming Languages_title", + "dataFromNodeIsbn": { + "title": "Types and Programming Languages", + "authors": [ + "Benjamin C. Pierce" + ], + "publisher": "MIT Press", + "publishedDate": "2002-01-04", + "description": "A comprehensive introduction to type systems and programming languages. A type system is a syntactic method for automatically checking the absence of certain erroneous behaviors by classifying program phrases according to the kinds of values they compute. The study of type systems—and of programming languages from a type-theoretic perspective—has important applications in software engineering, language design, high-performance compilers, and security. This text provides a comprehensive introduction both to type systems in computer science and to the basic theory of programming languages. The approach is pragmatic and operational; each new concept is motivated by programming examples and the more theoretical sections are driven by the needs of implementations. Each chapter is accompanied by numerous exercises and solutions, as well as a running implementation, available via the Web. Dependencies between chapters are explicitly identified, allowing readers to choose a variety of paths through the material. The core topics include the untyped lambda-calculus, simple type systems, type reconstruction, universal and existential polymorphism, subtyping, bounded quantification, recursive types, kinds, and type operators. Extended case studies develop a variety of approaches to modeling the features of object-oriented languages.", + "industryIdentifiers": [ + { + "type": "ISBN_13", + "identifier": "9780262162098" + }, + { + "type": "ISBN_10", + "identifier": "0262162091" + } + ], + "readingModes": { + "text": false, + "image": true + }, + "pageCount": 646, + "printType": "BOOK", + "categories": [ + "Computers" + ], + "maturityRating": "NOT_MATURE", + "allowAnonLogging": false, + "contentVersion": "preview-1.0.0", + "panelizationSummary": { + "containsEpubBubbles": false, + "containsImageBubbles": false + }, + "imageLinks": { + "smallThumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=5&edge=curl&source=gbs_api", + "thumbnail": "http://books.google.com/books/content?id=ULT4DwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api" + }, + "language": "en", + "previewLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&printsec=frontcover&dq=isbn:0262162091&hl=&cd=1&source=gbs_api", + "infoLink": "http://books.google.co.jp/books?id=ULT4DwAAQBAJ&dq=isbn:0262162091&hl=&source=gbs_api", + "canonicalVolumeLink": "https://books.google.com/books/about/Types_and_Programming_Languages.html?hl=&id=ULT4DwAAQBAJ" + } + } +} \ No newline at end of file diff --git a/jendeley-backend/generated_DBs/jendeley_db_2.0.10.json b/jendeley-backend/generated_DBs/jendeley_db_2.0.10.json new file mode 100644 index 0000000..0b6329c --- /dev/null +++ b/jendeley-backend/generated_DBs/jendeley_db_2.0.10.json @@ -0,0 +1,1567 @@ +{ + "jendeley_meta": { + "idType": "meta", + "version": "2.0.10" + }, + "arxiv_2212.12976": { + "path": [ + "Modular Formal Verification of Rust Programs with Unsafe Blocks [jendeley download 1673165594267].pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nKatholieke\nUniversiteit\nLeuven\nDepartment of\nComputer Science\nMODULAR FORMAL VERIFICATION OF RUST\nPROGRAMS WITH UNSAFE BLOCKS\nTechnical Report\nNima Rahimi Foroushaani\nBart Jacobs\nimec-DistriNet Research Group, KU Leuven, Belgium\n{nima.rahimiforoushaani, bart.jacobs}@kuleuven.be\nDec. 2022\narXiv:2212.12976v1 [cs.LO] 26 Dec 2022\n\nAbstract\nRustis a modern systems programming language whose type system guarantees memory safety. For\nthe sake of expressivity and performance it allows programmers to relax typing rules temporarily, using\nunsafecode blocks. However, inunsafeblocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert programmers make mistakes and\na memory safety bug in anunsafeblock renders all the type system guarantees void. To address this\nproblem we are trying to verify soundness of Rustunsafecode applying ourModular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made so far.\nContents\n1 Introduction2\n2 Unsafe Code and Safe Abstractions3\n2.1 Safe Abstractions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .3\n2.2 Unsound Unsafe . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .5\n3 Modular Symbolic Execution (MSE)6\n3.1 Concrete Execution . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .6\n3.2 Symbolic Execution . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .9\n3.3 Modular Symbolic Execution . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .9\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions . . . . . . . . . . . . . . . . . . .11\n4 RustBelt12\n4.1 RustBelt’s semantic model and MSE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .12\n5 Implementation15\n5.1 Executing MIR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .16\n5.2 Executing MIR in VeriFast . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .16\n5.3 Added value with respect to RustBelt . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .16\n6 Future Plans17\n6.1 Rigorous Soundness . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .18\n6.2 Panic Safety and Stack Unwinding . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .18\n7 Conclusion18\nbibliography20\nA Intended encoding of the RustBelt’s semantic model in VeriFast20\n1\n\n1 Introduction\nRust is a relatively new programming language that provides memory safety without sacrificing performance\nand practicality, all the while being suited for systems programming as well. To achieve these all together has\nturned out not to be that easy, however. Other common programming languages usually trade these goals, one\nto another. Fortunately, Rust’s safety claims have been proven to be legitimate. The formal works,RustBelt\n[8],RustHorn[11], andOxide[13] have proven the safety of formal languages, designed to capture the central\ncharacteristics of Rust. At the same time, Rust has proven it is not just a research language. It has found\nits way to the wild and in fact, is getting ever more popular. It shows the language is simple enough for\ndevelopers and industry to use. So, it is completely fair to say Rust shows great promises. The main reason\nbehind this success is the language type system. Rust’s type system leveragesownershipandborrowingto rule\nout the possibility of simultaneousmutationandaliasing. In this way it prevents many common mistakes,\ndevelopers commit regarding memory safety. The type system also makes Rust needless of a garbage collector\nwhich makes it suitable for embedded systems and systems programming.\nIt seems everything about Rust is perfect and it addresses all of the problems. But, does it? In the end,\nmutation and aliasing together are crucial whenever communication between threads is required, e.g.Mutexes.\nPrograms that do reference counting, also need simultaneous mutation and aliasing. To provide a complete\nset of expected functionalities from a modern programming language and performance improvements, Rust\nintroducesunsafecode blocks. The type checker checks these blocks with some relaxations to allow the\nimplementation of such functionalities. The cost of these relaxations is that programmers themselves should\nmake sure theunsafeblocks do not cause the program to exhibitundefined behaviour(UB). Developers\nabstractunsafeblocks behindsafe abstractions to prevent them from propagating through the codebase and\nto make them easier to inspect and reuse. It is effective but not enough. There have been memory safety bugs\nfound in many Rust libraries [4], including even the standard library [7] indicating keepingunsafeblocks safe,\nis not that straightforward.\nBefore going further, some terminology agreements are necessary. In this text,unsafecode refers to code\nenclosed in anunsafeblock. It doesnotmean there is necessarily something wrong with the code and does\nnotmean the code’s behaviour is necessarily undefined. FollowingThe Rustonomicon[12], to refer tounsafe\ncode that shows UB, we useunsoundunsafecode in contrast tosoundunsafecode which we know for sure\nwould not exhibit UB.\nTo address the problem of unsoundunsafecode in Rust’s ecosystem we plan to verify the safety of Rust\nprograms withunsafecode usingVeriFast’s [1]Modular Symbolic Execution(MSE) algorithm. VeriFast is\na research tool for verifying the safety and functional properties of C and Java programs using MSE. We\napply the MSE algorithm with the assumption that the input code has already passed Rust’s type and borrow\nchecker. The outcome of this verification algorithm for a program would be finding potential problems or to\nguarantee that despite havingunsafecode, the program does not exhibit UB. That is, no execution of the\nprogram accesses unallocated memory or contains data races. We represent and use the information needed\nfor reasoning about program safety during the MSE in the form of formulas of a dialect of Separation Logic.\nSeparation Logic is a logic developed specifically for reasoning about pointer-manipulating computer programs.\nWe get this required information from two main sources.\n1. we translate Rust’s rich type system’s meaning into Separation Logic formulae. For the meaning of\nRust’s types we are relying on the semantics provided by RustBelt. Encoding RustBelt’s semantics to\nmake it usable by VeriFast for verifying Rust programs is the novel aspect of this envisaged approach.\n2. we use the user-provided information in form of Separation Logic formulae annotated in the program\ncode. The user can also guide the MSE algorithm with lemmas and ghost commands to verify more\nprograms.\nTo evaluate our approach, we are extending VeriFast to support Rust programs. We use VeriFast’s backend as\nthe underlying engine for MSE and reasoning about Separation Logic formulae. It is worth noting, VeriFast\nuses its own dialect of Separation Logic.\nIn the rest of this text, in Section 2 we take a tour ofunsafeRust code, safe abstractions, and their\npotential unsoundness. Next, in Section 3 we explain the MSE algorithm for verifying the soundness of Rust\nprograms withunsafeblocks. In Section 4 we give a brief introduction to RustBelt, its semantic model, and\nits approach to proving the soundness of safe abstractions. We also talk about the reasons we chose to use\nRustBelt’s semantics and we show how we are going to use its semantic model in the MSE algorithm. Next,\nin Section 5 we report the progress that has been made so far to implement the suggested MSE algorithm\nand we discuss why our approach provides added values with respect to RustBelt. In Section 6 we explain\n2\n\nthe possibilities we envisage to contribute more to the safety of Rust ecosystem in the future. Eventually, we\nconclude in Section 7.\n2 Unsafe Code and Safe Abstractions\nTo guarantee memory safety Rust types representownership. Listing 1 shows these different types of ownership\nof a vector. The most basic type of ownership isexclusive ownership. Owner variables, e.g.v, have exclusive\nownership. An active owner variable rules out aliasing entirely. The value is represented in the whole program\njust by its owner and gets dropped when the owner is out of scope. We can transfer the ownership to other\nfunctions/threads. But it is still not that expressive. To regain ownership after passing it to a function, we\nshould return it back which is very inconvenient in most cases. To solve this issue, instead of moving exclusive\nownership we canborrowittemporarily. A mutable reference grantstemporary exclusive ownership. In our\nexample,mrvgives us write access. We mutate the vector inside functionpush_fourthrough the passed\nmutable reference,mrv. Oncemrvis out of scope, the ownervgets its exclusive ownership back again. Owners\nand mutable references, representing exclusive ownership, rule out aliasing. However, aliasing is needed to give\naccess to multiple threads to the same memory location. To represent a part of memory and sub-parts of it\nat the same time is also very common and handy in programming. Shared references are the Rust’s answer\nfor aliasing. Notice that we have a shared referencesrvto vectorvand a shared referencefirstto its first\nelement at the same time. To preserve memory safety shared references rule out mutation.\nAll of the references in Rust have alifetimein their type.'lin the type&'l mut i32is a lifetime.\nLifetimes represent a continuous range of program execution steps. Type system’s guarantees about references\nhold, as long as their lifetime is alive. Look at the signature of the functionpush_four. It has a lifetime\nparameter<'a>which is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|kwhich is used in the type of parameterr, i.e.&'a mut Vec. Lifetime parameters are\nthe way callees get informed about the aliveness of a lifetime in the caller. They are “another kind of generics”\n[10], in the sense that they are not run-time variables. They get instantiated at compile-time, i.e. when we\ncall a function with a lifetime parameter, the compiler tries to find a suitable lifetime instantiation for the\nlifetime parameter. In our example, the lifetime thatmrvhas in its type, has been annotated using comments\nin the code,l1. It is a suitable lifetime for instantiatingpush_four’s lifetime parameter. One implicit type\nsystem’s guarantee about lifetime parameters is that they alloutlivethe function’s body lifetime.\nRust’s type system rules out simultaneous mutation and aliasing using the ownership and borrowing rules.\nHowever, communication between threads needs mutation and aliasing together. As an example consider\naMutex. We need to have references to it in different threads, aliasing, and we need to lock it in those\nthreads, mutation. To have mutation and aliasing of a memory location in a program simultaneously is against\nRust’s type system rules. Moreover, the safety checks to maintain the type system’s guarantees are necessarily\nconservative and valid programs that do not pass these checks are not that few. To address expressivity besides\nsafety Rust introducesunsafecode, i.e. code blocks annotated with theunsafekeyword. The methodsetin\nListing 2 is an example of using anunsafecode block.unsafecode still gets checked by the type and borrow\nchecker, but with some relaxation. The The Rust Programming Language [10] book mentions five actions\nyou can take just inunsafecode and calls themunsafe superpowers. Three of these unsafe superpowers are\ninherently unsafe primitive constructs and two of them are just indicating there are some otherunsafeparts\ninside.\nIn this project, among primitive unsafe constructs, we will initially focus on supportingunsafecode\ninvolvingdereferencing raw pointers. The two others are used relatively rarely. Raw pointers are similar to C\npointers. Rust’s borrow checker does not track them and they can be null or dangling. Their types are of the\nform*const Tor*mut Tfor arbitrary pointee typeT.\nAmong the two non-primitive superpowers, we are interested incall anunsafefunction/method. Anunsafe\nfunction or method’s signature is annotated withunsafekeyword, e.g.unsafe fn function() {...}. The\nkeywordunsafein the function’s signature intuitively means calling this function has requirements that the\ntype system cannot check and it is up to the programmer to make sure they have been met. Anunsafe\nfunction’s body is anunsafecode block. Usingunsafefunctions propagates theunsafecode to the callers.\n2.1 Safe Abstractions\nIf we usedunsafesuperpowers to implement a functionality we can expose the unsafety to the user code by\nmarking our functions asunsafe. But it should stop at some point. Otherwise, theunsafecode propagates\nall over the codebase and we would not get much benefit from Rust’s type system. It puts the burden of safety\nchecks on the programmer’s shoulders and is in contradiction with type safety. It is much better to abstract\n3\n\npub fn push_four<'a>(r: &'a mut Vec) {\nr.push(4)\n}\n/*** [l1] means the lifetime l1 */\npub fn access_types() {\nlet mut v: Vec = vec![1, 2, 3];// v is the owner\n{//----------------------------------------------------\nlet mrv: &mut Vec = &mut v;// |\n/*** |\n* mrv is a mutable borrow of v |\n* as long as this borrow is alive it [l1]\n* is not possible to access |\n* the vector through v |\n*/ // |\npush_four(mrv);// mutable borrow has full access |\n}//----------------------------------------------------\nlet _ = v.pop();// v has its ownership back\n{//----------------------------------------------------\nlet srv: &Vec = &v;// |\n/*** |\n* srv is a shared/immutable borrow of v |\n* the vector cannot get mutated as long as |\n* it is borrowed by any immutable borrow |\n*/ // |\n{//---------------------------------------- |\nlet first: &i32 =// | |\nv.first().unwrap();// | |\n/*** | [l2]\n* multiple shared references, | |\n* borrowing from the same owner, | |\n* can coexist [l3] |\n*/ // | |\nprintln!(\"{} is the first in {:?}\",//| |\nfirst, srv);// | |\n}//---------------------------------------- |\n}//----------------------------------------------------\nlet _ = v.pop();\n/***\n* The owner v goes out of scope here\n* and the value gets dropped\n*/\n}\nListing 1: Different types of memory ownership in Rust’s types\n4\n\npub struct Cell {\nvalue: i32,\n}\nimpl Cell {\npub fn new(value: i32) -> Cell {\nCell { value }\n}\npub fn get<'a>(&'a self) -> i32 {\nself.value\n}\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\n}\nimpl !Sync for Cell {}\nListing 2: A simplified version ofstd::cell::Cell\ntheunsafeparts in a safe function. Such a function would be asafe abstraction. Then it can be called in safe\nRust and the type system checks whether the caller meets the requirements the function type represents. In\ncase of safe functions without anyunsafeblock in their body, the type system also checks that the function\nbody complies with the function type. However, it is not the case for a safe abstraction. It is the programmer’s\njob to ensure the function body satisfies what the function type announces to the safe world. As an example,\nlet us look at Listing 2. The methodsetis a safe abstraction. Notice that its signature is safe and it gets\nan argument of type&'a selfthat is a shared reference to an object ofstruct Cell. While it has only a\nshared reference to the object, using anunsafeblock and dereferencing a raw pointer, it writes to the contents\nof the object. The code mutates the contents of memory through a shared reference! It is in contradiction\nwith the core rules of the type system. Recall that one of the guarantees of a shared reference type is that\nno mutation would happen during the reference’s lifetime. But thissetmethod is not a horrible mistake.\nThe fact that there is a shared reference together with the type system’s guarantees implies there is a valid\nchunk of memory containing a validCellvalue. If we could make sure all aliases of aCellobject are limited\nto just one thread there would not be a memory safety issue. There are other type checks regarding sending\nownership and borrows to other threads. Because of those checks and the code lineimpl !Sync for Cell {}\nin our example, the type system does not allow sending a shared reference of aCellobject to another thread.\nMoreover, no public method inCelllibrary leaks a reference to the internal state of aCellobject. That\nprevents sendingdeep pointersof theCellto other threads. These together means libraryCellholds the\nfollowing property: All aliases of aCellobject remain in the same thread. That would be ourCelllibrary\ninvariant. The usage ofunsafecode inCelllibrary is sound and abstracts away theunsafeblock. The\nlibrary adds the functionality of mutation through shared reference, but because of its invariant, it is still\nsafe. Safe code can useCellobjects without the necessity of taking care of memory safety. Our example is\nclose to what the realstd::cell::Cellin the standard library is. Libraries that abstract away their unsafe\nsuperpower application from their user, usually guarantee memory safety by holding such invariants. Mutating\nan object’s internal state through shared references, abstracted from the user code, is calledinterior mutability\nandstd::cell::Cellis the most basic form of interior mutability in Rust.\n2.2 Unsound Unsafe\nNot allunsafeusages are sound. It is easy to use an unsafe superpower and end up with undefined behaviour\n(UB). Recall that raw pointers are C-style pointers and dereferencing a null or dangling raw pointer is UB.\nEven worse, a safe abstraction’s body may not satisfy the guarantees the function signature describes. Listing\n3 shows examples for both cases. The functionbreaks_ty_sysin this example does not access unallocated\n5\n\npub fn deref_null() {\nlet ptr = 0x0usize as *mut i32;\nunsafe {\n*ptr = 42;\n}\n}\npub fn breaks_ty_sys(rrx: &mut &mut i32) {\nlet ptr = rrx as *mut &mut i32 as *mut *mut i32;\nunsafe {\n*ptr = 0x0usize as *mut i32;\n}\n}\nListing 3: Unsoundunsafecode examples\nmemory. However, it violates the type system guarantees that type checker always assume when it checks safe\ncode. In such cases, the problem might show up in the execution of safe code. In general, writing soundunsafe\ncode is very difficult, especially in the presence of Rust language constructs such as higher-order functions,\ntraits and panics that complicate the task of analyzing the possible behaviors of a piece of code.\n3 Modular Symbolic Execution (MSE)\nRust has a rich type system that checks memory safety statically. But its soundness relies on the soundness\nof the libraries that apply unsafe superpowers. Programmers who develop these libraries, being human, make\nmistakes. A single memory safety bug in anunsafeblock encapsulated in a library that is used by a program\nrenders all of the type system’s guarantees void. Here is the point we are targeting to contribute to Rust\nsafety. To verify soundness of safe abstractions andunsafecode behind them, we propose applyingModular\nSymbolic Execution(MSE) onunsafecontaining parts of programs and observing if all the memory accesses\nthrough raw pointers are safe and if safe abstractions are right about what they suggest to the safe world by\ntheir interface types. The latter is, checking if safe abstractions implement exactly what their signature/type\nmeans. Here, arises a more fundamental question. What do Rust types mean? We need to answer this question\nbefore we could check the bodies of safe abstractions against their type’s meaning. Fortunately, we do not\nneed to propose an answer from scratch. RustBelt [8] already suggests formal semantics for Rust’s types. In\nthis section, we give a brief example-driven explanation of the Modular Symbolic Execution (MSE) of Rust\nprograms. Later, in Section 4 we briefly discuss RustBelt [8], a well-respected work that suggests a formal\nsemantic model for Rust’s types. Moreover, we will explain why we have chosen to use its semantic model\nand we show a more sophisticated motivating example of the MSE algorithm leveraging RustBelt’s semantic\nmodel.\nListing 4 shows parts of a library that implements aDeque(double-ended queue) all usingunsafecode.\nThis library’s functions receive and return Deque instances just using raw pointers. In Rust, having a raw\npointer does not guarantee anything about the memory it points to, e.g. the type checker does not count on\nanything about the pointee of the returned raw pointer fromcreate_deque. That means trying to verify this\nexample we would need to checkcreate_deque’s body against fewer type-induced proof obligations which\nsimplifies the introduction to our MSE. Later in 4.1, we will discuss an example of MSE of a safe abstraction,\nwith types that represent more guarantees.\n3.1 Concrete Execution\nWe are trying to show no execution ofunsafecode performs memory access violations and neither violates\nthe type system’s guarantees. In the Deque example, it just suffices to make sure our implementation does\nnot perform memory access violation. Let us assume we chose the most naive solution. We decide to verify\nthe Deque by executing all of its possible executions and observe if they access memory chunks that they do\nnot have any right to.\nWe execute our program on an abstract machine.StoreandHeaptogether are the state of the machine.\nStore is a function that maps variables to their current value. Heap is an accounting of the abstract machine’s\nmemory. Mathematically, Heap is amultisetof heap chunks. Heap chunks are predicates applied to arguments\n6\n\nuse std::ptr::addr_of_mut;\npub struct Node {\nprev: *mut Node,\nvalue: i32,\nnext: *mut Node,\n}\npub unsafe fn create_deque() -> *mut Node {\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\n// ...\nListing 4: A Deque, implemented just usingunsafeRust\nthat represent information about the memory. We use predicates from VeriFast’s dialect of Separation Logic.\nSeparation Logic is a logic family, developed specifically for reasoning about pointer-manipulating concurrent\nprograms. We will talk more about VeriFast in Section 5.\nLet us start by executing thecreate_dequefunction. Store and Heap are empty at the beginning and\nthe first statement islet sentinel: *mut Node = std::alloc::alloc(...) as *mut Node;. From the\ndocumentation ofstd::alloc::alloc, we know that if the function returns, either it has failed to allocate\nthe requested memory and the return value is anullraw pointer or it has allocated required memory in which\ncase we know the following.\n1. The address stored insentinelis notnull\n2. The address stored insentinelis aligned\n3. Adequate number of bytes to store an instance ofNodeare allocated at the address stored insentinel\n4. Up until deallocating this memory block, no other part of the program can allocate any of these bytes\nAfter the execution of this line, there are different possible machine states. In one state, the value in the\nsentinelcould benull, in another one0x1000, and in another one0x12345. In the states where the\nsentinel’s value is notnull, there are chunks, batches of bytes, allocated in Heap that our program is\nallowed to access. But since the memory has just been allocated, we do not know anything about the values\nstored in those bytes. The memory is not yet initialized after allocation and we do not have any guarantees\nabout the validity of values stored in it. That is why we are representing them with the special valueh. In Rust\nproducingan invalid value is considered UB. “Producing a value happens any time a value is assigned to or read\nfrom a place, passed to a function/primitive operation or returned from a function/primitive operation” [12].\n“An integer [. . . ], floating point value [. . . ], or raw pointer obtained from uninitialized memory, or uninitialized\nmemory in astr” [12] are invalid values. To reflect this, if a program attempts to read ahvalue our execution\nalgorithm gets stuck, i.e. does not verify the program.\nIt is worth noting we do not want to verify our program against a specific concrete machine, and it\nmeans the set of possible addresses is practically infinite. Thanks to the non-determinism of the address that\nstd::alloc::alloc(...)returns, there are practically infinitely many possible states after executing this line\nof code. We can show program execution paths in a tree which branches whenever there are different possible\noutcome states after executing a statement. Figure 1 shows theconcrete execution treeforcreate_deque.\nWe represent the information we know about the allocated block of memory in Heap using the following heap\nchunks.\n1.malloc\nblockNode(0x1) means there is an allocated block of memory starting from address0x1with\nsufficient bytes to store an instance ofNode.\n7\n\nStore:\nHeap:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,h)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x0\nH:\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,h)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,h)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,h)\n. . .\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\nS:sentinel=0x1\nH:mbN(0x1),Np(0x1,0x1)\nNv(0x1,h),Nn(0x1,0x1)\nS:sentinel=0x2\nH:mbN(0x2),Np(0x2,0x2)\nNv(0x2,h),Nn(0x2,0x2)\n. . .\nreturn sentinel;return sentinel;\nFigure 1: The concrete execution tree of functioncreate_dequein Listing 4. The predicate names have been\nabbreviated in this figure as follows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, and\nNode\nnext→Nn\n2.Node\nprev(0x1,h) means the address0x1plus offset of fieldprevofstruct Nodeis an aligned memory\naddress and points to enough bytes allocated to hold a value of the type of the fieldprev, i.e.*mut Node\nand no other thread knows about this bunch of bytes, i.e. we have write and read access to those bytes.\nThe second argument,h, is the current value stored in those allocated bytes.\n3.NodevalueandNodenextsimilar toNodeprev\nLooking at Figure 1 we have an execution path in whichsentinel==0x0, marked by red and infinitely many\nexecution paths, marked by green, in whichsentinel!=0x0, i.e. the ones where memory allocation succeeded.\nIn case of memory allocation failure, the program aborts by a call tostd::alloc::handle_alloc_error(...).\nIn case of successful allocation with the state withsentinel==0x1, we have to execute the subsequent write\noperations.\naddr_of_mut!((*sentinel).prev).write(sentinel);is a write to fieldprevof aNodememory block\nat the address stored insentinel, on this path0x1. This write is safe because in our Heap we have the\npredicateNode\nprev(0x1,h). After the write the value stored in the field gets updated,Nodeprev(0x1,0x1).\nIf there was no such chunk in Heap, our execution algorithm would get stuck, representing that the program\nis attempting to access memory, without being sure that it has the right to do so. The next write operation\nis safe similarly. The final statement isreturn sentinel;. Representing the return procedure involves many\n8\n\ndetails. Since our goal here is to explain modular symbolic execution, we don’t discuss possible cases and keep\nourselves focused on this example. Here, the value of the localsentinelgets copied into the return place.\nNotice that we still have the memory chunks produced in the Heap. The execution finished successfully and\nthis path is fine. Note that, since the execution tree is (practically) infinite, traversing it entirely according to\nthe procedure described here is (practically) impossible in finite time.\n3.2 Symbolic Execution\nInstead of dealing with infinite concrete execution trees, it is possible to abstract away some details that make\npaths distinct and represent infinitely many of them using a single one. To do so we usesymbols instead of\nconcrete values. Using symbols, we forget about corresponding concrete values, but we still remember the\nfacts that hold for all of them. In this text, we typeset symbols likêsym, to make them distinct. Back to\nour example, to represent the address stored insentinelafter allocation we choose a symbol, let us say\n̂\nl,\nand also store the facts we know about it. We will have a single symbolic execution path for the case of\nallocation failure which in\n̂\nl=0x0and another symbolic execution path representing all the concrete paths\nwhere memory allocation is successful. In all of the successful paths,\n̂\nl6=0x0and the Heap chunks at address\n̂\nl\nwould be produced. To represent a symbolic execution state, we show the symbolic Store as\n̂\nstore, the symbolic\nHeap as\n̂\nheap, and thepath conditionas\n̂\npath\ncond. The path condition is our knowledge base about symbols.\nWe store the persistent facts we know about symbols in it. Figure 2 shows the finitesymbolic execution tree\ncorresponding to the practically infinite concrete execution tree shown in Figure 1.\nThe execution using symbols and facts we know about them is calledSymbolic Execution. It is modelling of\nthe concrete execution. Executingcreate_dequesymbolically, when we want to check if a write toNode.prev\nfield is safe, we do the same as what we did in concrete execution, except that instead of checking the existence\nof aNode\nprevchunk with a concrete value as the address we look for one with a term provably equal to\n̂\nlas\nits address. Both symbolic execution paths ofcreate_dequeare safe. The safety of the path with successful\nallocation implies the safety of infinitely many corresponding concrete paths.\n3.3 Modular Symbolic Execution\nThe preceding subsection showed how symbolic execution algorithm successfully verifiescreate_deque. It\nalso showed that after executing it there would be chunks of aNodestruct instance in the Heap at the address\nthe function returns and the same address is stored inprevandnextfields of thatNodeinstance in the heap.\nMoreover, thevaluefield is uninitialized. Now, what if we try to verify a program that callscreate_deque\nseveral times. Executing the body of functions over and over is a waste. Even worse, in the case of loops and\nrecursive functions, our symbolic execution algorithm may not terminate. We also like to verify our programs\nin a modular way, e.g. it is not pleasant to get involved with internal states of callees when we try to verify\na caller. It would be useful, if we could save/document the knowledge we learn about the body of a function\nby symbolically executing it. Then instead of executing the body every time the function gets called, we can\nreuse that knowledge to infer what would be the state of execution if the call returns. This knowledge is\ncalledfunction contract. Generally, we like a function’s contract to tell us what is the weakestpre-condition,\ni.e. set ofrequirements, for this function which if it holds no execution of the function exhibits UB. That is,\nthe minimal upper bound of the states if we execute the function’s body starting from them, the execution\nwould be safe. We also want the contract to tell us as much as possible about the effects that calling the\nfunction has on the execution state. In other words, what the strongestpostconditionthe functionensuresis.\nThat is, the maximal lower bound of guarantees about outcome states of all safe executions of the function.\nIf a human/verifier provides us with a function contract in a well-defined logic, we can check the contract’s\npropositions against the function body/implementation and if the body satisfies the contract, we can just\nreuse the contract every time we want to check a call to the function. This contract serves the same purpose\nas informal documentation, written in natural languages. But it is comprehensive and machine-checkable.\nListing 5 showscreate_dequeannotated with VeriFast Separation Logic formulas as its contract.\nLet us verify an imaginary call tocreate_dequewith the contract shown in Listing 5, usingMod-\nular Symbolic Execution. First, we should verify thatcreate_deque’s body satisfies its contract. The\nrequiresclause of the contract, i.e.//@ requires true, means to get executed safely,create_dequeneeds\nthattrueholds. Unsurprisingly,truealways holds in Separation Logic. So there are no special require-\nments, i.e. no Heap chunks or facts about symbols, to assume when we start to verify the function. Also,\ncreate_dequehas no parameters, which means there is nothing in the\n̂\nstorewhen we start checking its\nbody. We start verifyingcreate_deque’s body from an empty\n̂\nstore,\n̂\nheap, and\n̂\npath\ncond. In this specific\ncase, we are starting from the same state as when we were executing justcreate_dequesymbolically and\n9\n\n̂\nstore:\n̂\nheap:\n̂\npath\ncond:\nlet sentinel = std::alloc::alloc(...) as *mut Node;\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\nif sentinel.is_null()\n{...}\nif sentinel.is_null()\n{...}\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,h)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\n̂\nS:sentinel=\n̂\nl\n̂\nH:\n̂\nP:\n̂\nl=0x0\naddr_of_mut!\n((*sentinel).prev)\n.write(sentinel);\nhandle_alloc_error(...)\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,h)\n̂\nP:\n̂\nl6=0x0\naddr_of_mut!\n((*sentinel).next)\n.write(sentinel);\n̂\nS:sentinel=\n̂\nl\n̂\nH:mbN(\n̂\nl),Np(\n̂\nl,\n̂\nl)\nNv(\n̂\nl,h),Nn(\n̂\nl,\n̂\nl)\n̂\nP:\n̂\nl6=0x0\nreturn sentinel;\nFigure 2: The symbolic execution tree of functioncreate_dequein Listing 4. The execution paths represent\nthe paths with the same colour in Figure 1. The predicate names have been abbreviated in this figure as\nfollows.mallocblockNode→mbN,Nodeprev→Np,Nodevalue→Nv, andNodenext→Nn\n10\n\nunsafe fn create_deque() -> *mut Node\n//@ requires true;\n/*@ ensures result!=0 &*& malloc_block_Node(result) &*& Node_prev(result, result) &*&\nNode_value(result, _) &*& Node_next(result, result);\n*/\n{\nlet sentinel: *mut Node = std::alloc::alloc(std::alloc::Layout::new::()) as *mut Node;\nif sentinel.is_null() {\nstd::alloc::handle_alloc_error(std::alloc::Layout::new::())\n}\naddr_of_mut!((*sentinel).prev).write(sentinel);\naddr_of_mut!((*sentinel).next).write(sentinel);\nreturn sentinel;\n}\nListing 5:create_dequewith contract, annotated in VeriFast Separation Logic\nnon-modularly. So the next three lines would have the same effect and we do not repeat those execution\nsteps here. Although, there is an interesting difference at the return point. The contract’sensuresclause,\ni.e.//@ ensures result!=0 &*& malloc_block_Node(result) &*& ..., is describing the effect of a call\ntocreate_dequeon the state of the caller, assuming the requirements of the call have been satisfied. So the\nreturn point is the point where we should verify theensuresclause. One of the facts thisensuresclause\nasserts is that when a call tocreate_dequereturns, its mentioned chunks have been added to the Heap. The\nresultkeyword in theensuresclause is a binder for the return value of the function, here, the symbolic\nvalue stored insentinel, i.e.\n̂\nl. To verify theensuresclause weconsumeits mentioned chunks from the\n̂\nheap. That is, we check the existence of the claimed chunks and since their access rights are being transferred\nto the caller, we deprivecreate_dequeof those rights by removing the chunks from\n̂\nheap. It prevents us\nfrom transferring access rights of some Heap chunks to the caller twice. Theensuresclause also mentions a\npersistent fact, i.e.//@ ensures result!=0, which we should check. The check is trivial because the exact\nassertion is in\n̂\npath\ncondat the return point. In our example, after consuming theensuresclause chunks,\n̂\nheapwould be empty. It means we could be sure thatcreate_dequedoes not leak memory chunks. The\ncaller knows about theensuresclause chunks and the responsibility of deallocating them is now upon the\nhigher-level code. Rust’s type system does not provide any guarantees about memory leaking in the presence\nofunsafecode and tracking it is an added value of our MSE algorithm. Now we verified that the contract\nholds. Let us see what happens when we try to verify the call tocreate_dequeassuming the state at the\ncall site is empty. Bycreate_deque’s contract, we know it does not need anything special before calling\nit. So we are good to go. We do not look up anything aboutcreate_deque’s body. The next step of our\nMSE algorithm is to just look upcreate_deque’s contract andproducetheensuresclause. Assuming we\nrepresent the return value bŷr, it leads to addinĝr6=0x0to\n̂\npath\ncondand adding the memory chunks\nmalloc\nblockNode(̂r),Nodeprev(̂r,̂r),Nodevalue(̂r,h),Nodenext(̂r,̂r) to the\n̂\nheap. It captures the effect of\nthe call tocreate_dequeand we can continue the execution of the rest of the caller’s body.\n3.4 Modular Symbolic Execution and Verifying Safe Abstractions\nAs we mentioned at the beginning of this section the Deque example is simple. That is because first, its\ninterface is completelyunsafeand second, it interacts just using raw pointers. This simplicity of interface\ntypes helped us to establish the idea of MSE. It also made us annotate the contract ourselves. In Rust, many\nfacts about a function’s contract are encoded in the function’s type. In safe Rust, the type checker checks\nthe safety of calls to the functions against the information encoded in their types, not an annotated contract.\nThe type checker assumes the body of the function complies with its type. For purely safe functions this\nassumption gets checked during the type checking of the function itself. When it comes to safe abstractions,\nit is the programmer’s responsibility to make sure that the function body complies with its type. Instead\nof verifying statically checked safe code, it is better to just verify that safe abstractions bodies satisfy the\npropositions encoded in their types. To verify a function’s body, we start verifying the body from a symbolic\nstate described by the function’s contractrequiresclause and check the validity of its contract’sensures\nclause at its return point(s). Now that the contract is encoded in the function’s type, we need to represent\n11\n\nthe meaning of the Rust’s types in Separation Logic to use them in the MSE algorithm.\nTo interpret the encoded information in a function type and use them in MSE, we use the semantic model\nprovided by RustBelt [8]. In the next section, we explain RustBelt briefly and using an example we represent\nour plan for Modular Symbolic Execution of safe abstractions based on RustBelt’s semantic model for Rust’s\ntypes.\n4 RustBelt\nRustBelt [8], RustHorn [11], and Oxide [13] are all well-known formal works around Rust. They all suggest\ncalculi that capture Rust’s essence. However, we found RustBelt more suitable for our purposes. RustBelt\nproves Rust’s type safety takingunsafeRust into account, while the two other works do not. To prove the\nsafety of Rust withunsafecode, the popularProgress and Preservationmethod is not useful.unsafeRust is\nnot well-typed respecting safe Rust type system rules and Rust with relaxed typing rules forunsafecode is\nnot type-safe! That is why RustBelt follows the semantic approach usinglogical relationsto prove the safety\nof Rust programs withunsafecode. RustBelt introducesλ\nRust\n, a formal language close to Rust’sMid-level\nIntermediate Representation(MIR). Next, it provides a formal interpretation forλ\nRust\n’s types and typing\njudgments in a dialect of Separation Logic, Iris [2]. This interpretation is the semantic model they provide\nforλ\nRust\n’s type system. Then they prove the safety ofλ\nRust\nusing this semantic model following three steps,\nwhich have been mentioned in RustBelt [8] paper as follows.\n1. “Verify that the typing rules ofλ\nRust\nare sound when interpreted semantically, i.e. as lemmas establishing\nthat the semantic interpretations of the premises imply the semantic interpretation of the conclusion.\nThis is called thefundamental theorem of logical relations.”\n2. “Verify that, if a closed program is semantically well-typed according to the model, its execution will\nnot exhibit any unsafe/undefined behaviours. This is calledadequacy.”\n3. “For any library that employsunsafecode internally, verify that its implementation satisfies the predicate\nassociated with the semantic interpretation of its interface, thus establishing that theunsafecode has\nindeed been safelyencapsulatedby the library’s API. In essence, the semantic interpretation of the\ninterface yields a library-specific verification condition.”\nWith fundamental and adequacy theorems together, we have thatsyntactically well-typed programs are safe.\nIn comparison with the syntactic approach for safety proofs, i.e. Progress and Preservation, there is an\nindirection in this semantic proof style. Intuitively, in progress and preservation, we show syntactically well-\ntyped programs are safe, but here we show syntactically well-typed programs are semantically well-typed and\nthen, semantically well-typed programs are safe. This indirection requires us to define a semantic model and\nmakes the proof longer and harder. The reward of this extra effort, however, is that by the Adequacy theorem\nwe can also show the safety of programs that are just semantically well-typed. This is the case mentioned in\nthe third step of RustBelt’s safety proof above.\nIntuitively, in our approach using MSE, we are following RustBelt’s step three. By our MSE we are proving\nno execution of functions of theunsafeapplying library violates their type’s meaning. We will talk about the\ndifferences between our approach and RustBelt, later in the Subsection 5.3. The semantic model RustBelt\nprovides is exactly what we needed in Section 3 as the formal meaning of the interface of a safe abstraction.\nTo be precise, Iris which RustBelt uses to represent its semantic model is not just a logic. It is a framework\nfor higher-order concurrent separation logic that can be used for reasoning about the safety of concurrent\nprograms. The fact that RustBelt is also using Separation Logic for its semantic model, makes it easier for us\nto use. Recall that we are using a dialect of Separation Logic in our MSE as well. In the next Subsection, we\ndiscuss using RustBelt’s semantic model in our MSE algorithm.\n4.1 RustBelt’s semantic model and MSE\nListing 6 shows the methodsetof our simplifiedCellimplementation shown in Listing 2. It has a\nlifetime parameter'a, and two normal parameters. The interesting one is&'a self. It is a shorthand\nforself: &'a SelfandSelfin our case isCell. Our de-sugared parameter would beself: &'a Cell,\na parameter namedselfof type&'a Cell, i.e. a shared reference. A reference type carries much more\ninformation than a raw pointer.self’s type tells us the following.\n1. Until the end of the time period denoted by lifetime'a, the following guarantees hold:\n12\n\npub fn set<'a>(&'a self, n: i32) {\nlet value_mut_ptr = &self.value as *const i32 as *mut i32;\nunsafe {\n*value_mut_ptr = n;\n}\n}\nListing 6: A safe abstraction method\nJ&\nκ\nshr\nτK.size:= 1(1)\nJ&\nκ\nshr\nτK.own(t,\nυ) :=∃`.υ= [`]∗JτK.shr(JκK,t,`)(2)\nJcellK.shr(κ,t,`) := &\nκ/t\nna\n(∃\nυ. `7→υ∗JintK.own(t,υ))(3)\nListing 7: RustBelt’s predicates related to interpreting a shared reference toCelltype\n1\n2. The parameterselfcarries an aligned non-null address.\n3. There are enough bytes to store aCellvalue allocated at the address stored inself.\n4. There is a validCellvalue stored there.\n5. The memory region does not overlap with any memory region, owned by any active owning variable or\nreferred to by any active mutable reference, i.e. the memory would not get mutated by anyone. Although,\nother shared references to the memory region may exist, e.g. other threads may read it.\nWe need this information in a formal form. Let us go through RustBelt’s semantics for this shared pointer\nbriefly. In RustBelt “Each typeτis interpreted by a tupleJτK= (size,own,shr) of a natural number and\ntwo Iris predicates” [8]. Listing 7 shows RustBelt’s predicates used for interpreting&'a Celltype.\nDefinition 1 of thesizevalue for shared references toτunder lifetimeκshows that all shared references\nare of size 1 memory unit. Definition 2 of theownpredicate for shared references toτunder lifetimeκhas an\ninteresting meaning. Its body uses theshrcomponent of the interpretation of typeτ, i.e.JτK.shr(JκK,t,`).\nThis represents the fact that to have a shared reference to a typeτhas different meanings depending onτ.\nThat is why RustBelt defines ashrcomponent for the interpretation of every type\n2\n. Continuing to explore\nthe meaning of predicateownfor our shared reference to aCell, we need the definition of predicateshrof\nCell’s interpretation. It is shown in Definition 3. Before we explain it we need to know about RustBelt’s\nlifetime logic.\nTo facilitate expressing and reasoning about temporary and potentially shared ownership of resources in\nIris, RustBelt introduces a lifetime logic as an Iris library. To introduce these different kinds of ownership, this\nlibrary relies onborrows, which are proposition constructors. The notation &\nκ/t\nna\n...is a kind of borrow named\nnon-atomic persistent borrowthat represents thread-dependent temporary and potentially shared ownership.\nIt is used to interpret theCelltype. Let us explore the information this borrow and lifetime logic rules\nrepresent aboutCell. We need to know about them to explain the MSE ofCell::set.\nRecall that the typeCellallows clients to mutate its contents through a shared reference. That happens\nby applying anunsafesuperpower in itssetmethod. Having a shared reference does not rule out aliasing.\nSo mutating data through shared references suggests the possibility of data races. To keepCellusages safe,\nwe should make sure all of its aliases remain in the same thread. Fortunately, the type system takes care of it.\nThe code lineimpl !Sync for Cell {}, means values of typeCellare notSync. That means they cannot be\naccessed simultaneously from different threads. In the Rust type system it means values of type&'a Cellare\nnotSend, i.e. shared references to values of typeCellare not send-able to other threads. Moreover, no public\nfunction inCellleaks a deep reference to its contents. These facts together, prevent concurrent accesses to\nthe memory owned by aCelland safe world can useCellwithout worrying about data races.\nIn RustBelt a typeτisSend, if and only if, theJτK.own(t,υ) definition does not depend on the thread\nidentifiert. A typeτisSync, if and only if, the type of shared references toτ, i.e. &\nκ\nshr\nτ, isSend. The fact\n1\nSome details has been dropped for simplicity. For complete definitions see [9].\n2\nWe are not showing the definition of the componentshrfor shared references. It is not of interest in this example.\n13\n\n(\n&\nκ/t\nna\nP\n)\n∗[κ]\nq\n∗[Na:t]≡−\n∗\n.P∗\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n(4)\nListing 8:LftL-na-accrule from RustBelt’s lifetime logic\nthatCellis notSynchas been reflected in RustBelt’s interpretation as follows. The &\nκ/t\nna\nwhich has been used\nin theshrcomponent ofJcellKdepends on the thread identifiert. In shortCell’s sharing predicate depends\non the thread identifier. SinceJ&\nκ\nshr\nτK.own, shown in the Definition 2, consists ofJτK.shr,J&\nκ\nshr\ncellK.own\ndepends ontas well, reflecting that shared references toCellare notSend.\nThe interesting point in proving RustBelt’s step three aboutCell::setis that we need full/write access to\nCell’s content to be sure the write operation is safe. To understand how we can obtain such access, we need\nto look at the lifetime logic’s rules that provide us access to the resources held by a borrow. In our example,\nthe resources held by a non-atomic persistent borrow. Listing 8 shows ruleLftL-na-accof lifetime logic.\nThis is the rule we are looking for.\nIt describes how we can get full access to a resourcePwhen we have it under a non-atomic persistent\nborrow. Besides &\nκ/t\nna\nPitself, the rule requires [κ]\nq\nand [Na:t] . Intuitively, in theCell::setexample if we\nprovide a witness that lifetime'ais alive and we are in the same thread that theCellitself is we can get our\nfull access. But there is more than that about [κ]\nq\nand [Na:t] . Let us explain them in order.\n[κ]\nq\nis the lifetime logic’slifetime token, representing lifetimeκis alive/ongoing. That is the same lifetime\nas the one that appears in the non-atomic persistent borrow itself. To give us the resourceP, this rule requires\nus to provide evidence that the borrow lifetime is alive; fair enough. The fractionq, such that 0< q≤1, in\nthe lifetime token plays an important role. Whenever a lifetime starts, we get its token with the full fraction,\n[κ]\n1\n. The lifetime logic’s rules about accessing borrows consume a fraction of the lifetime token for a borrow’s\nlifetime, besides other requirements, to provide us with:\n1. Access to the resources behind the borrow. Represented inLftL-na-accbyP.\n2. Anupdatewhich takes back the borrowed resource and gives back the lifetime token fraction that\nhad been used when the rule was applied to provide the resource. In the case ofLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\npart.\nIn lifetime logic, we cannot show a lifetimeκis ended unless we consume its token with the full fraction. It\nmeans we need to take back all the fractions that have been used to get access to resources behind borrows\nunderκ. Taking the fractions back is just possible through those updates we just mentioned, in the case of\nLftL-na-accthe\n(\n.P≡−\n∗\n[κ]\nq\n∗[Na:t]\n)\n. Those updates always need the resources they have handed out,\nback. That is, to end a lifetime, we are forced to make sure all the permissions granted through borrows under\nthat lifetime have been taken back. Intuitively, the aliveness of a lifetime is a credit, we borrow access to\nresources relying on that lifetime and to end that lifetime we should have paid our debts to the lifetime back.\nMoreover, the rule requires the non-atomic token [Na:t], bound to the same thread as the non-atomic\npersistent borrow. “This token is created at the birth of the thread, and threaded through all of its control\nflow. That is, every function receives it and has to return it.” [8] The same scenario of consumption and giving\nback of [κ]\nq\ninLftL-na-acchappens for [Na:t] too. It means at return points we need [Na:t] back and to\nhave that again we need to give back the resource we have granted usingLftL-na-accrelying on the fact that\nwe are in threadt. Intuitively, at the function’s return point, it gets checked that whatever thread-dependent\nresource has been taken, has been given back.\nBack to our MSE algorithm, starting from a symbolic state containing RustBelt’s predicates extracted from\nCell::set’s type, we should be able to extract the facts we need to verifyCell::set’s body. Moreover we\nneed to check the integrity of the type system invariant at return points. To keep the text concise, we skip the\ndetails. Using what we learned from RustBelt’s semantic model and its lifetime logic, the outline of our MSE\nfor safe abstractionCell::setwould be as follows: Since, by Rust’s type system, it is always guaranteed that\nthe instantiations of a function’s lifetime parameters outlive the function execution period, at the beginning\nof the function, we have a fraction of the lifetime token for each lifetime parameter. The function’s execution\nperiod is a lifetime, always shown by binderF. Obviously, function execution is happening in a thread; so we\nget a non-atomic token for the current thread. And of course, we get theowncomponent of the interpretation\nof the type of the function’s parameters. That gives us the symbolic execution state, shown in row number 1\n14\n\nof Table 1, to start our symbolic execution\n3\n.\nTable 1: Modular Symbolic Execution of the safe abstraction methodCell::set.\nFor all rows\n̂\nstore={self:̂s,n:̂n}and\n̂\npath\ncond={F v̂a,0<̂q≤1}.\n#Rust̂resource\n1fn set<'a>(...)\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,J&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\n2//@open shr.own\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,JcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n3//@open cell.shr\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\n,&\n̂a/\n̂\nt\nna\n(\n∃\nυ.̂s7→υ∗JintK.own(\n̂\nt,υ)\n)\n4//@lemma lftl_na_acc\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n5*value_mut_ptr = n;\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n,\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,υ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\n6//@apply update s|->n\n[\nNa:\n̂\nt\n]\n,[̂a]\n̂q\nTo justify the write inCell::setwe need write permission for theCell’s content. We can get ac-\ncess to corresponding memory chunks by opening theJ&\n̂a\nshr\ncellK.own\n(\n̂\nt,[̂s]\n)\nto its definition which gives us\nJcellK.shr\n(\n̂a,\n̂\nt,̂s\n)\n. By opening the latter again, we would have the symbolic execution state in the row number\n3 in Table 1.\nNow usingLftL-na-accshown in Listing 8 we can get write access. But recall that the rule also needs to\nconsume a fraction of borrow lifetime token, i.e. [̂a]\n̂\nq\n′\n, and the non-atomic token bound to the current thread,\ni.e.\n[\nNa:\n̂\nt\n]\n. Because we do not need [̂a] for the rest ofCell::setbody to get access to another borrow, we\ncan just give all the fraction of [̂a] we have toLftL-na-acc. After applying the rule we have the symbolic\nstate shown in the row number 4 in Table 1.\nThe write can be verified now because we have full access to the Heap chunk̂s7→\nυ. The write operation\nupdates the value of the chunk giving us the updated resource\n(\n̂s7→[̂n]∗JintK.own\n(\n̂\nt,[̂n]\n))\n. The state is\nshown in the row number 5 of Table 1. By the next statement,Cell::setreturns.Cell::set’s return type\nis not shown explicitly which in Rust means it is(), i.e. the unit type. To closeJ()K.own(\n̂\nt,[]) does not\nneed any resources so we can easily close it out of thin air. There is no destructor call happening here as\nwell. As a check for preserving the type system invariant at the return point, we consume whatever fraction\nof external lifetime tokens we got for lifetime parameters. In the case ofCell::setthere is just'a. So we\nneed to consume back [̂a]\n̂q\n. By doing so we make sure whatever resources we have granted from borrows under\n'a, we are giving back to the caller. Recall that to have [̂a]\n̂q\nand\n[\nNa:\n̂\nt\n]\nback, we need to use the update\n(\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n≡−\n∗\n[̂a]\n̂q\n∗\n[\nNa:\n̂\nt\n]\n)\nin our̂resource. Using the update needs consuming the\ngranted resource\n(\n∃\nυ.̂s7→υ∗JintK.own\n(\n̂\nt,\nυ\n))\n, i.e. giving it back. The caller needs to take back the lifetime\ntoken fraction provided to call the current function. Another obvious return point verification is consuming\nthe non-atomic token with the current thread binder,\n[\nNa:\n̂\nt\n]\n. Recall it is being threaded through all the calls\nin a thread.\nOur target claim is that, for atype-checkedprogram, if the MSE algorithm successfully executes all safe\nabstractions and the wholeunsafehierarchy of code behind them, no execution of that program will exhibit\nUB. In RustBelt’s terminology, that means if our MSE algorithm verified a safe abstraction, there exists a\nRustBelt proof to show the safe abstraction holds its interface type guarantees. In short, we intend for our\nMSE algorithm to be sound regarding to step three of RustBelt’s safety proof mentioned at the beginning of\nthis section.\n5 Implementation\nTo evaluate our MSE algorithm on non-trivial examples and case studies, we are implementing our algorithm to\nhave a tool to symbolically execute Rust programs. There are two important questions needed to be addressed\nregarding our implementation. First, which representation of Rust we should symbolically execute and second,\nhow we can reuse the capabilities of the existing research tool VeriFast to implement our algorithm.\n3\nTo show our purpose clearer, we dropped details regarding the facts that in RustBelt there is no mutable store and all locals,\ni.e. parameters and local variables, are owned pointers. We are just showing them here as store variables.\n15\n\n5.1 Executing MIR\nSurface Rust has a heavily sugared syntax and there is no formal operational semantics by the language\ncommunity for it. MIR, however, is heavily simplified by the compiler. In MIR, temporary values of higher\nrepresentations of Rust programs are bounded and function bodies are represented in the form of a Control-flow\nGraph. But the essence of ownership and borrowing representing types is still preserved in this intermediate\nrepresentation. Generic definitions are also still in place in MIR. Therefore, it is much simpler and easier\nto execute and reason about MIR instead of surface Rust while having interesting properties of language in\nhand to work with. Both RustBelt and RustHorn calculi,λ\nRust\nand COR respectively, are inspired by MIR\nwitnessing this fact. Moreover, to compensate for the lack of formal operational semantics, the language\ncommunity relies on a MIR interpreter named MIRI. It is much easier to refer to MIRI to see what exactly\nthe semantics of a program is. That is why we decided to symbolically execute MIR representation in the\nbackground. To get the MIR representation of a program along with type definitions and user annotations,\nwe have implemented a Rust program which uses the official Rust compiler front-end to type and borrow\ncheck the program and generate its MIR. Using the official compiler front-end saves a lot of work and also\nprevents our tool to diverge from what exactly the Rust compiler is. If the program passes the front-end\nchecks successfully, our tool translates all required information to Cap’n Proto [3] data structures and dumps\nit to standard output. Cap’n Proto is a data interchange format supported in many different programming\nlanguages. This makes our MIR extraction program reusable for other Rust analyser tools.\n5.2 Executing MIR in VeriFast\nFortunately, we do not need to implement a symbolic execution tool capable of reasoning about Separation\nLogic propositions from scratch. VeriFast is a research tool for verifying C and Java programs annotated\nwith VeriFast’s dialect of Separation Logic and VeriFast’s ghost commands. Extending VeriFast to support\nRust, or more accurately to support MIR, spares us implementing the executing and reasoning engine from\nscratch. To symbolically execute MIR in VeriFast, our approach is to translate MIR, Rust’s types semantics,\nand user annotations together into VeriFast’s C abstract syntax tree (AST). By doing so, we are effectively\ndefining an operational semantics for MIR using VeriFast’s C operational semantics. A similar process of\ndefining operational semantics forλ\nRust\nby translating it to another language happens in RustBelt. “The\noperational semantics ofλ\nRust\nis given by translation into a core language. The core language is a lambda\ncalculus equipped with primitive values, pointer arithmetic, and concurrency” [8].\nSince MIR is a control-flow graph, translating the code control-flow to C control constructs is straightfor-\nward. For some data types, there are direct equivalents, e.g.booland more or less integers; some others do\nnot have direct equivalents but it is still easy to translate them. As an example, the approach for translating\ntuples is using Cstructs with reserved names. For more complex Rust types that are not fully representable\nby C types, as already mentioned, the approach is to add RustBelt type semantics represented in VeriFast’s\nSeparation Logic. The examples in appendix A illustrate our intention for generating RustBelt rules and\npredicates for a safe abstraction\n4\n.\nAt the time of writing this report, the tool can verify a simple example of memory allocation, access\nand un-allocation, shown in Figure 3. Even this simple example includes two generic functions whose defini-\ntions are parameterised by a type. The instantiations of functionsnewandis_nullused in the example are\nstd::alloc::Layout::new::()andstd::ptr::mut_ptr::::is_null(*mut u8)respec-\ntively. Generic definitions are not generally handled yet. For these cases, we substitute with equivalents of\ntheir instantiated implementation.\nThe MIR extraction program and the VeriFast extension for supporting Rust are works in progress and\ncurrently support a very limited subset of Rust. The development of VeriFast including the MIR extractor\nprogram is being done in branchrustin a fork of VeriFast that can be found athttps://github.com/\nNima-Rahimi-Foroushaani/verifast. The current status of the code including theallocexample shown in\nFigure 3 is available as a Zenodo drop athttps://doi.org/10.5281/zenodo.7472607. To build and run the\ncode follow the instructions provided along with the Zenodo drop.\n5.3 Added value with respect to RustBelt\nA valid question then is that while RustBelt already exists why should we bother to enhance VeriFast to verify\nRust programs withunsafecode. To verify the safety of a new library with RustBelt one would need to\nhave considerable knowledge about Iris in the first place. Moreover, it would be necessary to translate the\n4\nThe mentioned examples have been provided by Prof. Bart Jacobs.\n16\n\nFigure 3: The alloc.rs Rust program verified by VeriFast\nsurface Rust code toλ\nRust\n. After all, it is just the starting point to the safety proof of the program. In\nour approach, however, the required knowledge is VeriFast separation logic and our intended encoding of the\nRustBelt semantic framework including lifetime logic in VeriFast. VeriFast would work with the surface Rust\nand the translation to MIR happens in the background using the Rust compiler front-end. That reduces the\nburden of learning for Rust developers who aim to verify their code. On the other hand, our approach leads to\nhaving actual Rust code and VeriFast annotation, i.e. verifiable formal documentation, together in the same\nplace. Our hypothesis is that it leads to a better information encoding scheme for practicality. Listing 9 shows\nan actualunsafefunction from the Rust core library with a hypothetical VeriFast annotation along with a\npart of corresponding informal documentation.\n6 Future Plans\nIn subsection 5.3, we mentioned some practical added value for verifyingunsafeRust using VeriFast in\ncomparison with RustBelt. But we plan to contribute further to the safety of Rust ecosystem in other ways\n/// ...\n/// Behavior is undefined if any of the following conditions are violated:\n/// * Both `x` and `y` must be [valid] for both reads and writes of `count *\n/// size_of::()` bytes.\n/// * Both `x` and `y` must be properly aligned.\n/// * The region of memory beginning at `x` with a size of `count *\n/// size_of::()` bytes must *not* overlap with the region of memory\n/// beginning at `y` with the same size.\n/// ...\npub const unsafe fn swap_nonoverlapping(x: *mut T, y: *mut T, count: usize)\n//@ requires Interp_own(T)(x,?vs1) &*& Interp_own(T)(y,?vs2) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n//@ ensures Interp_own(T)(x,?vs2) &*& Interp_own(T)(y,?vs1) &*& length(vs1)==count &*&\nlength(vs2)==count↪→\n{...}\nListing 9: Anunsafefunction from Rust core library with a hypothetical VeriFast annotation\n17\n\nas well in the future. In subsection 6.1 we explain the possibilities of further formal work to establish the\nsoundness of our MSE algorithm. One of the problems we are targeting to address in VeriFast is the safety\nproblems that occur in the presence ofunsafecode and stack unwinding. In subsection 6.2 we discuss the\nproblem and why our implementation shows promise to solve that.\n6.1 Rigorous Soundness\nOne could rightfully argue about the soundness of our MSE algorithm respecting RustBelt proofs. To support\nour soundness claim rigorously, there are two possible approaches. One is to formalize our MSE algorithm\nbased onλ\nRust\n’s operational semantics and prove that if it verifies a function there is a RustBelt proof for the\nsafety of the function as well. Another approach is to generate a function-specific Iris proof out of executing\nthe function. For that, we need to define a function between a passed/verified symbolic execution tree of a\nfunction and a RustBelt soundness proof about it.\n6.2 Panic Safety and Stack Unwinding\nAccording to The Rustonomicon [12], Rust’s error handling scheme is as follows:\n•If something might reasonably be absent,Optionis used.\n•If something goes wrong and can reasonably be handled,Resultis used.\n•If something goes wrong and cannot reasonably be handled, the thread panics.\n•If something catastrophic happens, the program aborts.\nAlthough, the first two, are recommended and common ways of reporting unhappy results, there are many\nplaces Rust code may panic. “Panics cause the thread to halt normal execution and unwind its stack, calling\ndestructors as if every function instantly returned” [12]. A program can recover from panic and handle it using\nstd::panic::catch_unwind. On the other hand,std::process::abort, immediately terminates the current\nprocess. In the case of panic, the compiler takes care of the safety and the cleaning up in the unwinding\nexecution path. Once again, when it comes tounsafecode, the information encoded in types is not enough\nto be sure about safety. In presence of theunsafeblocks, “code that transiently creates unsound states must\nbe careful that a panic does not cause that state to be used” [12]. Listing 10 shows an example of such bugs,\ninspired by a real-life one [5]. This kind of bug is hard for a human to track. Programmers need to constantly\nkeep the probability of panic in mind and address all of the transient unsound states. Fortunately, the bug\nfrom the standard library has been fixed. But notice that it is a mistake made by experts. This kind of bug is\nstill showing up now and then in the ecosystem. That is why RUDRA [4] aims for this bug’s pattern as one\nof its targets. While RUDRA is a valuable static analyzer which has made the language ecosystem safer, it\ndoes not guarantee panic safety. The panic execution path becomes explicit once the compiler reduces surface\nRust to MIR. Listing 11 shows a part of the compiled down MIR forsift_upthat has been shown in Listing\n10. It showsBasic Blockbb8where the call to functionle, i.e. operator≤gets executed. One of the possible\nsuccessors of theTerminatorfor this function call corresponds to the case if the function call panics and it is\nbasically a jump toBasic Blockbb23.\nTo address the panic safety in presence ofunsafecode, there are two possible steps to take. First we can\nextend RustBelt with panics and prove the safety of safe abstractions in presence of panic there. Second, since\nin our tool we are symbolically executing MIR in the background, it can naturally take the panic execution\npaths into account. However, the unwinding path does not return a value from the function we are verifying.\nThen not all the guarantees the function type asserts, need to hold. We need to study what the exact necessary\nchecks are to claim theexception safetyof a function after a panic.\n7 Conclusion\nThe problem of verifying the memory safety of Rust programs withunsafeblocks suggests a good opportunity\nto contribute to the safety of the software industry. Our modular symbolic execution approach is inspired by\nthe formal work Featherweight VeriFast [6], relying on the semantic model provided by RustBelt [8]. The solid\nformal foundation we are building upon makes our approach very likely to have solid results. On the other\nhand, in our research path, we keep evaluating our algorithm with real-life scenarios by extending VeriFast\nand using Rust compiler front-end. VeriFast as a verification software has proven to be useful. There is a\n18\n\nuse core::mem::{replace, MaybeUninit};\nuse core::ptr;\npub struct BinaryHeap {\npub data: Vec,\n}\nimpl BinaryHeap {\n// T implements Ord\npub fn sift_up(&mut self, start: usize, mut pos: usize) {\nunsafe {\nlet new = replace(\n&mut self.data[pos],\nMaybeUninit::::zeroed().assume_init(),\n);\n// There is an element with all bytes zeroed\n// which is not necessarily a valid value\nwhile pos > start {\nlet parent = (pos - 1) >> 1;\nif new <= self.data[parent] {\n// What if the '<=' panics!\nbreak;\n}\nlet x = replace(\n&mut self.data[parent],\nMaybeUninit::::zeroed().assume_init(),\n);\nptr::write(&mut self.data[pos], x);\npos = parent;\n}\nptr::write(&mut self.data[pos], new);\n}\n}\n}\nListing 10: An example of memory safety bug in presence ofunsafecode and function call panic inspired from\nRust’s issue 25842 [5]\nbb8: {\n_21 = _22;\n_19 = ::le(move _20, move _21) -> [return: bb9, unwind: bb23];\n}\nListing 11: Part of MIR corresponding to methodsift_uphas shown in Listing 10. Stack Unwinding execution\npath is explicit in MIR\n19\n\nfundamental interest in safety in the Rust community. Integrating the official Rust compiler with VeriFast\nprovides the possibility for Rust ecosystem to improve the safety of language.\nbibliography\n[1]VeriFast.url:https://github.com/verifast/verifast.\n[2]Iris.url:https://iris-project.org/.\n[3]Cap’n Proto.url:https://capnproto.org/.\n[4] Yechan Bae et al. “Rudra: Finding Memory Safety Bugs in Rust at the Ecosystem Scale”. In:Pro-\nceedings of the ACM SIGOPS 28th Symposium on Operating Systems Principles. SOSP ’21. Virtual\nEvent, Germany: Association for Computing Machinery, 2021, pp. 84–99.isbn: 9781450387095.doi:\n10.1145/3477132.3483570.url:https://doi.org/10.1145/3477132.3483570.\n[5]BinaryHeapis not exception safe. Rust issue #25842.url:https://github.com/rust-lang/rust/\nissues/25842.\n[6] Bart Jacobs, Fr ́ed ́eric Vogels, and Frank Piessens. “Featherweight VeriFast”. In:Logical Methods in\nComputer Science11.3 (2015). Ed. by Tobias Nipkow.doi:10 . 2168 / lmcs - 11(3 : 19 ) 2015.url:\nhttps://doi.org/10.2168%2Flmcs-11%283%3A19%292015.\n[7] Ralf Jung.MutexGuard>must not beSync. Rust issue #41622.url:https://github.com/\nrust-lang/rust/issues/41622.\n[8] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language”. In:Proc.\nACM Program. Lang.2.POPL (Dec. 2017).doi:10.1145/3158154.url:https://doi.org/10.1145/\n3158154.\n[9] Ralf Jung et al. “RustBelt: Securing the Foundations of the Rust Programming Language – Technical\nappendix and Coq development”. In: (2017).url:https://plv.mpi-sws.org/rustbelt/popl18/.\n[10] Steve Klabnik and Carol Nichols with contributions from the Rust Community.The Rust Programming\nLanguage.url:https://doc.rust-lang.org/book/title-page.html.\n[11] Yusuke Matsushita, Takeshi Tsukada, and Naoki Kobayashi. “RustHorn: CHC-Based Verification for\nRust Programs”. In:Programming Languages and Systems. Springer International Publishing, 2020,\npp. 484–514.doi:10.1007/978-3-030-44914-8_18.url:https://doi.org/10.1007%2F978-3-030-\n44914-8_18.\n[12] Contributions from the Rust Community.The Rustonomicon.url:https://doc.rust-lang.org/\nnomicon.\n[13] Aaron Weiss et al.Oxide: The Essence of Rust. 2019.doi:10.48550/ARXIV.1903.00982.url:https:\n//arxiv.org/abs/1903.00982.\nA Intended encoding of the RustBelt’s semantic model in VeriFast\nThe examples that have been discussed in this appendix, have been provided by Prof. Bart Jacobs, not by\nNima Rahimi Foroushaani\nThe example that has been shown in Listing 12 is an illustration of our goal for verifying Rust’s safe abstractions\nusing VeriFast. The other example in Listing 13 shows the outcome of our intended translation from the\nexample of Listing 12 to a C program plus required RustBelt’s semantic model rules and predicates.\n20\n\npub struct Cell_i32 {\nvalue: i32\n}\n/*@\npred Cell_i32_nonatomic_borrow_content(l: *i32, t: thread_id)() =\n*l |-> _;\ninterp Cell_i32 {\npred shared(k: lifetime, t: thread_id, l: *i32) = nonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n}\n@*/\nimpl Cell_i32 {\nfn replace(&self, val: i32) -> i32\n//@ req [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ens [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nlet result: i32 = self.value;\nself.value = val;// using unsafe superpower\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\n}\nListing 12: ACellimplementation in Rust with the intended user provided VeriFast’s annotations that are\nrequired for verifying it. This example has been provided by Prof. Bart Jacobs\n21\n\n/*@\n// Lifetime logic\nabstract_type lifetime; // Type of lifetimes\nabstract_type thread_id; // Type of thread IDs\npredicate lifetime(lifetime k;); // Lifetime token\npredicate thread_token(thread_id t); // nonatomic token with Top mask ([NaInv: t.Top] in RustBelt)\npredicate nonatomic_borrow(lifetime k, thread_id t, void *l, predicate() P); // nonatomic borrow with mask Nshr.l\nlemma void open_nonatomic_borrow(lifetime k, thread_id t, void *l, real q); // Rule LftL-na-acc with N = Nshr.l and requiring NaInv: t.Top instead of NaInv: t.N\nrequires nonatomic_borrow(k, t, l, ?P) &*& [q]lifetime(k) &*& thread_token(t);\nensures P() &*& close_nonatomic_borrow_token(P, q, k, t);\npredicate close_nonatomic_borrow_token(predicate() P, real q, lifetime k, thread_id t);\nlemma void close_nonatomic_borrow();\nrequires close_nonatomic_borrow_token(?P, ?q, ?k, ?t) &*& P();\nensures [q]lifetime(k) &*& thread_token(t);\n// Cell type interpretation\npredicate_ctor Cell_i32_nonatomic_borrow_content(void *l, thread_id t)() =\ninteger(l, _);\npredicate Cell_i32_shared(lifetime k, thread_id t, void *l) = // SHR predicate for Cell\nnonatomic_borrow(k, t, l, Cell_i32_nonatomic_borrow_content(l, t));\n@*/\n// fn replace<'a>(self: &'a Cell, val: i32) -> i32\nint replace(int *self, int val)\n//@ requires [?q]lifetime(?a) &*& Cell_i32_shared(a, ?t, self) &*& thread_token(t);\n//@ ensures [q]lifetime(a) &*& thread_token(t);\n{\n//@ open Cell_i32_shared(a, t, self);\n//@ open_nonatomic_borrow(a, t, self, q);\n//@ open Cell_i32_nonatomic_borrow_content(self, t)();\nint result = *self;\n*self = val;\n//@ close Cell_i32_nonatomic_borrow_content(self, t)();\n//@ close_nonatomic_borrow();\nreturn result;\n}\nListing 13: The intended C translation of the example, shown in Listing 12 with the VeriFast’s annotations.\nThe annotations here are the user provided ones in the example shown in Listing 12 plus the ones that our\nintended approach would generate. This example has been provided by Prof. Bart Jacobs\n22", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/2212.12976v1", + "updated": "2022-12-26T00:19:19Z", + "published": "2022-12-26T00:19:19Z", + "title": "Modular Formal Verification of Rust Programs with Unsafe Blocks", + "summary": " Rust is a modern systems programming language whose type system guarantees\nmemory safety. For the sake of expressivity and performance it allows\nprogrammers to relax typing rules temporarily, using unsafe code blocks.\nHowever, in unsafe blocks, the burden of making sure that the code does not end\nup having undefined behaviour is on the programmer. Even most expert\nprogrammers make mistakes and a memory safety bug in an unsafe block renders\nall the type system guarantees void. To address this problem we are trying to\nverify soundness of Rust unsafe code applying our Modular Symbolic Execution\nalgorithm. This text outlines our approach and the progress that has been made\nso far.\n", + "author": [ + { + "name": "Nima Rahimi Foroushaani" + }, + { + "name": "Bart Jacobs" + } + ], + "arxiv:comment": { + "_": "22 pages, 13 listings, 3 figures, Technical report, Appendix by Bart\n Jacobs", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/2212.12976v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/2212.12976v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": [ + { + "$": { + "term": "cs.LO", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + { + "$": { + "term": "cs.PL", + "scheme": "http://arxiv.org/schemas/atom" + } + } + ] + } + }, + "doi_10.1007/978-3-540-71229-9_9": { + "path": [ + "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegister Allocation and Optimal Spill Code\nScheduling in Software Pipelined Loops Using\n0-1 Integer Linear Programming Formulation\nSantosh G. Nagarakatte\n1\nand R. Govindarajan\n1,2\n1\nDepartment of Computer Science and Automation,\n2\nSupercomputer Education and Research Center,\nIndian Institute of Science, Bangalore 560012, India\n{santosh,govind}@csa.iisc.ernet.in\nAbstract.In achieving higher instruction level parallelism, software\npipelining increases the register pressure in the loop. The usefulness of\nthe generated schedule may be restricted to cases where the register\npressure is less than the available number of registers. Spill instructions\nneed to be introduced otherwise. But scheduling these spill instructions\nin the compact schedule is a difficult task. Several heuristics have been\nproposed to schedule spill code. These heuristics may generate more spill\ncode than necessary, and scheduling them may necessitate increasing the\ninitiation interval.\nWe model the problem of register allocation with spill code genera-\ntion and scheduling in software pipelined loops as a 0-1 integer linear\nprogram. The formulation minimizes the increase in initiation interval\n(II) by optimally placing spill code and simultaneously minimizes the\namount of spill code produced. To the best of our knowledge, this is\nthe first integrated formulation for register allocation, optimal spill code\ngeneration and scheduling for software pipelined loops. The proposed\nformulation performs better than the existing heuristics by preventing\nan increase in II in 11.11% of the loops and generating 18.48% less spill\ncode on average among the loops extracted from Perfect Club and SPEC\nbenchmarks with a moderate increase in compilation time.\n1 Introduction\nSoftware pipelining [14] is the most commonly used loop scheduling technique for\nexploiting higher instruction level parallelism. In a software pipelined loop, in-\nstructions from multiple iterations are executed in an overlapped manner. Several\nheuristic methods [2,19] have been proposed to construct a software pipelined\nschedule. In addition a number of methods [10] have also been proposed to find\nan optimal schedule considering resource constraints. A schedule is said to be\noptimal if the initiation interval (II) of the schedule is not greater than that of\nany other schedule for the loop with the given resource constraints.\nSoftware pipelining, like other instruction scheduling techniques, increases the\nregister pressure. A number of heuristic approaches to reduce the register pressure\nS. Krishnamurthi and M. Odersky (Eds.): CC 2007, LNCS 4420, pp. 126–140, 2007.\nc\n\u0002Springer-Verlag Berlin Heidelberg 2007\n\nRegister Allocation and Optimal Spill Code Scheduling127\nof the software pipelined schedule have been proposed [11]. Also, approaches to\nminimize the register pressure of the software pipelined schedule using linear [16]\nand integer linear program formulation have been reported in literature. However,\nthese methods do not guarantee that the register requirements of the constructed\nschedule is less than the available registers. If the register need of the constructed\nschedule is greater than the available number of registers, either spill code needs\nto be introduced or the initiation interval needs to be increased [21]. In order to\ndetermine whether the constructed schedule is feasible for the given number of reg-\nisters, register allocation must be performed with necessary spill code generation.\nFurther the spill code must be scheduled in the compact schedule, without violat-\ning any resource or dependence constraints. Currently heuristic approaches [21]\nhave been proposed for the introduction of spill code. Unfortunately, introduction\nof spill code can saturate the memory units and thereby force an increase in the\ninitiation interval.\nIn this paper, we are interested in addressing the following problem: Given a\nmodulo scheduled loop L, a machine architecture M and an initiation interval II,\nis it possible to perform register allocation with the given registers and optimally\ngenerate and schedule necessary spill code such that the register requirement of\nthe schedule is lesser than or equal to the available number of registers? We\npropose a 0-1 integer linear programming formulation for register allocation,\noptimal spill code generation and spill code placement in software pipelined\nloops. The proposed approach is guaranteed to identify a schedule with necessary\nspill code, whenever such a schedule exists, without increasing the initiation\ninterval. Further the proposed approach generates minimal spill code, thereby\nimproving the code quality. The proposed formulation takes into account both\nthe compactness of the schedule and memory unit usage. Further the formulation\nincorporates live range splitting [4] which allows a live range to be assigned to a\nregister at specific time instances and be resident in memory in rest of the time\ninstances. To the best of our knowledge, this is the first integrated formulation\nfor register allocation, optimal spill code generation and scheduling for software\npipelined loops. The formulation is useful in evaluating various heuristics and\none can generate a better quality code with a moderate increase in compilation\ntime. We have implemented the solution method on loops from Perfect Club and\nSPEC2000 benchmarks. On an average, we prevent an increase in the initiation\ninterval in 11.11% of the 90 loops on an architecture with 32 registers and in\n12% of the 157 loops on an architecture with 16 registers when compared to the\nheuristic approach [21]. We also generate roughly 18.48% less spill code compared\nto the heuristic solution.\nThe paper is organized as follows: Section 2 provides a brief motivation for\noptimal spill code generation and scheduling. In Section 3, we explain our integer\nlinear programming formulation. Section 4 presents the simplified formulation.\nSection 5 presents the experimental methodology andresults.InSection6,we\ndiscuss the related work and concluding remarks are provided in Section 7.\n\n128S.G. Nagarakatte and R. Govindarajan\n2 Motivation\nTraditionally, the process of adding spill code is done iteratively [21] for architec-\ntures with no rotating registers. First, the loop is modulo scheduled, then register\nallocation is performed. If the register pressure of the schedule is greater than\nthe available number of registers, then spill candidates are chosen. Subsequently\nspill code is added and the loop is rescheduled. In the process above, since the\nselection of spill candidates is based on acertain heuristic, it may result either\nin the addition of extra spill code or the introduction of spill code at a time step\nwhere no memory unit is available. These, in turn, may increase the memory\nunit usage necessitating an increase in the initiation interval. Various heuristics\nhave been proposed for generating spill code and scheduling spill code [1].\nCritical cycleis one of the key characteristicsused by heuristics to decide on\nthe spill candidates. A time steptis said to be aCritical cyclein the kernel if\nthe number of live ranges at that instant is greater than the number of available\nregisters. In Figure 1(a), we show the live ranges of a software pipelined schedule\nwithII= 6 and assume there are four registers available. For this schedule,\ncycle 2 is the critical cycle. To performregister allocation with the available\nfour registers for the given schedule, one of the live ranges must be spilled. A\ncommonly used heuristic gives priority to the spill candidate with longest live\nrange [21]. Unfortunately, it is possible that the longest live range does not span\nthrough critical cycle. Hence, spilling the longest live range may not necessarily\nreduce the register pressure. A refined heuristic considering the above prioritizes\nthe spill candidate which is live at the critical cycle and has the longest lifetime\namong the the spill candidates [21]. The heuristics may not be able to capture\nall the scenarios.\nused\n0\n1\n0\n0\n0\n1\nTime \nSlot\n A\nBC DE\nMem units\n0\n1\n2\n3\n4\n5\nX\nO\nO\nX\nX\nO\nX\nO\nO\nO\nX\n(a) Initial Schedule\n1\n1\n1\n0\n0\n1\n A\nBC D E\n0\n1\nMem units\nused\nTime \nSlot\n2\n3\n4\n5X\nload\nX\nO\nX\nX\nOO\nX\nO\nO\nO\nstore\n(b) Final Schedule\nFig. 1.Initial kernel with II = 6. X is the definition and O is the use of the live range.\nConsider the kernel shown in Figure 1(a). In this example, we have assumed a\nload and a store latency of 1 cycle and the presence of a single memory unit and\n4 registers. The memory unit usage in the kernel is indicated in the figure. The\nkernel is obtained for an initiation interval of 6. The register need of the schedule\n\nRegister Allocation and Optimal Spill Code Scheduling129\nis 5. So we need to insert spills in order to reduce register need. Figure 1(b) shows\nthe kernel after the spill code has been scheduled. Among the spill candidates,\nvariables D and E have the longest live range and pass through the critical cycle\n2. In the kernel in Figure 1(b), though the spill store for E is scheduled at cycle\n0, the value in the register continues and ends only at cycle 1. If we had chosen\nD as the spill candidate, we would not have been able to spill and hence reduce\nthe register pressure at cycle 2. This is because of the use of D in cycle 2. As\na result, it is not only necessary to select the right spill candidate but also to\nschedule the spill loads and stores so that the register need of the loop is reduced\nwithout unnecessarily requiring an increase in the initiation interval.\nThe recent work in spill code generation [21] addresses the iterative process of\nadding spill code by selecting a finite number of candidates for spilling based on\naquantity factorwhich is determined experimentally. By adopting the notion of\nquantity factor, we are making the decision of selecting the spill candidate and\nscheduling them incrementally, considering a few candidates. It is possible that\nthe greedy approach can fail. In our experimentation, the quantity factor of 0.5\nresulted in an increase in the initiation interval in 12% of the loops that had\nsufficent register pressure and needed the addition of spill code.\nMoreover, there are a plethora of factors that need to beconsidered while\nchoosing the right spill candidate which can be suitably scheduled with a min-\nimal amount of spill code. An injudicious selection and subsequent scheduling\ncan result in an unnecessary increase inthe initiation interval, which can be\nattributed to addition of otherwise superfluous spill code saturating the memory\nusage.\n3 ILP Formulation for Spill Code Minimization and\nScheduling\nIn this section, we explain our 0-1 integer linear programming formulation for\nregister allocation and spill code scheduling in software pipelined loops assum-\ning a load-store architecture with no rotating registers. A solution to the ILP\nformulation would represent a valid schedule with spill code suitably sched-\nuled satisfying the register and functional resource constraints. Given a software\npipelined loop with modulo variable expansion [14] carried out, our efficient reg-\nister allocation and spill code scheduling formulation involves the association\nof decision variables to the live range, formulation of relationship between the\ndecision variables that need to be satisfied, solving the integer linear program\nand rewriting the original code.\n3.1 Generation of Decision Variables\nGiven a data dependence graph and a periodic schedule, we model a live range\nwith a set of decision variables. The live range produced by instructioniis\ndenoted by the temporary nameTN\ni\n. Without the loss of generality, we use\nthe term temporary variable and live range interchangeably as each temporary\n\n130S.G. Nagarakatte and R. Govindarajan\nvariable has exactly one definition point. The live rangeTN\ni\nis represented with\na series of liveness decision variables from its definition time (T\ndef\ni\n)toitslast\nuse time (T\nend\ni\n). A live range can be allocated to any of the R registers. Hence\ncorresponding to each time instantt∈[T\ndef\ni\n,T\nend\ni\n]andregisterr,wecreate\nliveness decision variables of the formTN\ni,r,t\n. The decision variableTN\ni,r,t\n=1\nrepresents the fact that theTN\ni\nis allocated to registerrat time instantt.\nTo determine where to introduce spill stores and loads in the schedule, we\nintroduce two kinds of spill decision variables namely store decision and load\ndecision variables.\n1. Store decision variable: We introduce store decision variablesSTN\ni,r,t\nfor\nevery live rangeTN\ni\n, for register r and time t. The store decision variable\nSTN\ni,r,t\n= 1 implies that there is a spill store of the live rangeTN\ni\nin\nregisterrat time instantt. The store decision variable is defined only for\na subset of the time steps in the kernel. More specifically, it is defined only\nfor time stept∈[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nstore\n\u0004lat\nload\n]wherelat\ni\n,lat\nstore\nandlat\nload\nare latencies ofinstructioni, store and load respectively. This\nis because the spill store can be scheduled only afterT\ndef\ni\n⊕lat\ni\n.Further\nthe spill store must be scheduledlat\nstore\n+lat\nload\ncycles before the last\nuse. Since all time steps should be within [0, II−1], the add and subtract\noperations are performed modulo II and represented as⊕and\u0004respectively.\nThe store decision variableSTN\ni,r,t\nis defined for time stepst∈storeset(i)\nwherestoreset(i)=[T\ndef\ni\n⊕lat\ni\n,T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n].\n2. Load decision variable: We introduce load decision variableLT N\ni,r,t\nfor\nevery live rangeTN\ni\n,registerr,andtimestept. The load decision vari-\nableLT N\ni,r,t\n= 1 implies that there is a spill load of the live rangeTN\ni\nscheduled at time instantt. The load decision variableLT N\ni,r,t\nis defined\nfor time stepst∈loadset(i)whereloadset(i)=[T\ndef\ni\n⊕lat\ni\n⊕lat\nstore\n,\nT\nend\ni\n\u0004lat\nload\n].\nWe illustrate the introduction of live range and spill decision variables with a\nspecific example in Figure 2. An instruction which defines the value of a tem-\nporary variableTN\n1\nis scheduled at time 0. The last use ofTN\n1\nis scheduled\nat time 9. The liveness, spill load and store decision variables introduced corre-\nsponding to register R0 are shown in Figure 2. In this example, the latency of\nthe instruction producing the live rangeTN\n1\nis 1, and that of store or load is 2.\nTo represent whether the live rangeTN\n1\nis live in register R0 at various time\nsteps during its live range, we use decision variablesTN\n1,0,0\n,... TN\n1,0,9\n.The\nstore decision variables are defined for time steps [1, 5]. We do not define the\nstore decision variable at time instant 0 since it is the definition time. Similarly\nthe store decision variable is not defined for time steps [6, 9] as splitting the live\nrange beyond time step 5 does not result in a meaningful spill load to be sched-\nuled before the last use ofTN\n1\n. Similarly we do not create spill load decision\nvariables at time steps [0, 2], since spill store would not have completed by that\ntime, and at time steps [8, 9], as the spill load would not complete before the\nlast use at 9.\n\nRegister Allocation and Optimal Spill Code Scheduling131\n1\n2\n3\n4\n5\n6\n7\n8\n9\nTime\n0\nDecision variables for \n=\n \nregister R0\nTN\n1\n=\n.. op TN\n1\n=.. op TN\n1\nTN\n1,0,0\nTN\n1,0,1\nSTN\n1,0,1\nTN\n1,0,2\nSTN\n1,0,2\nTN\n1,0,3\nSTN\n1,0,3\nLTN\n1,0,3\nTN\n1,0,4\nSTN\n1,0,4\nLTN\n1,0,4\nTN\n1,0,5\nSTN\n1,0,5\nLTN\n1,0,5\nTN\n1,0,6\nLTN\n1,0,6\nTN\n1,0,7\nLTN\n1,0,7\nTN\n1,0,8\nTN\n1,0,9\nFig. 2.Decision variables associated with live rangeTN\n1\nand register 0 with an II=10\n3.2 Constraints\nHaving discussed the liveness, spill store and spill load decision variables cor-\nresponding to each time instant and register, we now explain how register al-\nlocation and spill code scheduling can be formulated using a set of constraints.\nSatisfaction of these constraints results in a schedule with valid register alloca-\ntion and appropriate spill code placement.\nMust-Allocate Definition Constraint:The Must-Allocate Definition Con-\nstraints ensure that a register is allocated to a live range when the live range is\ndefined. That is, for each instruction that produces a value, a register must be\nallocated to the live range. IfIis the set of instructions that produce a result\nvalue andTN\ni\nbe the temporary variable corresponding to instructioni∈I,the\nfollowing must-allocate definition constraint must be satisfied.\n∑\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(1)\nThere are exactly|I|constraints produced by the above equation. For the ex-\nample shown in Figure 2, corresponding toTN\n1\n, the following must-allocate\ndefinition constraint must be satisfied.\n∑\nr∈R\nTN\n1,r,0\n=1\nMust-Allocate Use Constraint:Must-Allocate Use Constraints ensure that\na live range is in a register at the time instant where there is an use. Let use(TN\ni\n)\nrepresent the set of instructions that use the temporary variableTN\ni\nproduced\n\n132S.G. Nagarakatte and R. Govindarajan\nby instructioni. The live rangeTN\ni\nmust be available in a register at time\ninstanttcorresponding to its use since we assume a load-store architecture.\nFor each instruction j∈use(TN\ni\n), scheduled at time instantt,\n∑\nr∈R\nTN\ni,r,t\n−\n∑\nr,t\n′\nLT N\ni,r,t\n′\n≥1for all t=T\ndef\nj\nand j∈use(TN\ni\n)(2)\nwheret\n\u0004\n∈(t\u0004lat\nload\n,t]. There are exactly\n∑\ni∈I\n|use(TN\ni\n)|constraints cor-\nresponding to the above equation. We refer to these as must-allocate use con-\nstraints.\nFor the example shown in Figure 2, corresponding toTN\n1\n, the following must-\nallocate use constraints must be satisfied.\n∑\nr∈R\nTN\n1,r,5\n−\n∑\nr∈R\n(LT N\n1,r,4\n+LT N\n1,r,5\n)≥1;\n∑\nr∈R\nTN\n1,r,9\n≥1\nAt-most Single Store Constraints:The live rangeTN\ni\nneed to be stored at-\nmost once. For every instructioni∈I, at-most one store constraint is given by\n∑\nt\n∑\nr∈R\nSTN\ni,r,t\n≤1(3)\nwhere t is in the range [(T\ndef\ni\n⊕lat\ni\n), (T\nend\ni\n\u0004lat\nload\n\u0004lat\nstore\n)].\nAs the objective minimizes the spill loads and stores, this constraint is re-\ndundant. However, this constraint reduced the solution time taken by the ILP\nsolver.\nStore Before Load Constraints:A spill load can be scheduled for a live\nrange provided there is an earlier spill store for that temporary name. At every\ntime instant where a spill load is possible, there must be a store which has\nbeen scheduled earlier. For every spill load corresponding to live rangeTN\ni\n,the\nfollowing constraints must be satisfied.\n∑\nr\nLT N\ni,r,t\n≤\n∑\nr\n∑\nt\n′\nSTN\ni,r,t\n′\n∀t∈loadset(i)(4)\nwheret\n\u0004\nis in the range [(T\ndef\ni\n⊕lat\ni\n), (t\u0004lat\nstore\n)]. There are exactly\n|loadset(i)|such constraints for eachTN\ni\nIn Figure 2, each of the spill loads corresponding to time steps [3, 7] must\nsatisfy the following constraints. We have assumed a store latency of 2.\n∑\nr∈R\nLT N\n1,r,3\n≤\n∑\nr∈R\nSTN\n1,r,1\n∑\nr∈R\nLT N\n1,r,4\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n)\n\nRegister Allocation and Optimal Spill Code Scheduling133\n∑\nr∈R\nLT N\n1,r,5\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n)\n∑\nr∈R\nLT N\n1,r,6\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n)\n∑\nr∈R\nLT N\n1,r,7\n≤\n∑\nr∈R\n(STN\n1,r,1\n+STN\n1,r,2\n+STN\n1,r,3\n+STN\n1,r,4\n+STN\n1,r,5\n)\nSpill Load Store Constraints:In order to schedule spill code in the compact\nschedule, we have introduced store and load decision variables at multiple time\ninstants. The following set of constraints ensure that there are no unnecessary\nspill code instructions and formulation generated schedule is valid.\nAt each time instanttfor any live range, ift∈loadset(i)andt∈storeset(i),\nthen the store before load and at-most only one store constraints ensure that\nboth load and store cannot be scheduled att. For each store decision variable at\ntimetcorresponding to live rangeTN\ni\n, a store can actually take place at that\ninstant only if the variable is in the register.\nSTN\ni,r,t\n≤TN\ni,r,t\n∀r∈Rand∀t∈storeset(i)(5)\nIn Figure 2, the following constraints corresponding to store of live rangeTN\n1\nin register 0, at time steps [1, 5] must be satisfied.\nSTN\n1,0,1\n≤TN\n1,0,1\n;STN\n1,0,2\n≤TN\n1,0,2\n;STN\n1,0,3\n≤TN\n1,0,3\n;\nSTN\n1,0,4\n≤TN\n1,0,4\n;STN\n1,0,5\n≤TN\n1,0,5\n;\nAfter a spill store, the live range in a register may continue to exist or cease\nto exist. But if there is a load in the subsequent time instant, then the load\nconstraints can bring the live range back into existence in the register. If a spill\nstore is possible for live rangeTN\ni\nat time instanttand spill load is not possible\nat time instantt+ 1, then the following constraints need to be satisfied.\nTN\ni,r,t⊕1\n≤TN\ni,r,t\n∀r∈R, f or all t∈storeset(i)and t⊕1/∈loadset(i)(6)\nIn Figure 2, the following constraints must be satisfied corresponding to the\nlive rangeTN\n1\nat time instant 1\nTN\n1,0,2\n≤TN\n1,0,1\nThe spill load brings back the live range into the register. There is no necessity\nof a spill load for any live rangeTN\ni\ncorresponding to registerrif the live range\nis already in the registerr. Further, a temporary name is live in a registerrat\ntimeteither if it was live at time stept\u00041 or if a spill load is scheduled in\ntime stept. For a spill load at time instantt, the following constraints need to\nbe satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n+LT N\ni,r,t\n∀r∈R,∀t∈loadset(i)(7)\n\n134S.G. Nagarakatte and R. Govindarajan\nIn Figure 2, the spill loads at time steps [3, 7] in register 0 must satisfy the\nfollowing constraints.\nTN\n1,0,3\n≤TN\n1,0,2\n+LT N\n1,0,3\n;TN\n1,0,4\n≤TN\n1,0,3\n+LT N\n1,0,4\nTN\n1,0,5\n≤TN\n1,0,4\n+LT N\n1,0,5\n;TN\n1,0,6\n≤TN\n1,0,5\n+LT N\n1,0,6\nTN\n1,0,7\n≤TN\n1,0,6\n+LT N\n1,0,7\nIf a spill load is not possible at time instantt, i.e t/∈loadset(i) and a spill store\nis not possible at time instantt\u00041, i.e t\u00041/∈storeset(i), then the following\ncontinuation constraints must be satisfied.\nTN\ni,r,t\n≤TN\ni,r,t\u00061\n∀r∈R, f or all t /∈loadset(i)∧t\u00041/∈storeset(i)(8)\nIn Figure 2, the continuation constraints corresponding to time instants 1, 8 and\n9 for register 0 and live rangeTN\ni\nare\nTN\n1,0,1\n≤TN\n1,0,0\n;TN\n1,0,8\n≤TN\n1,0,7\n;TN\n1,0,9\n≤TN\n1,0,8\nInterference Constraints:It is important to ensure that the same register is\nnot allocated to multiple live ranges. Interference constraints ensure that at any\ninstant of time, a register holds a single live range. It is sufficient to ensure that\nafter each live range definition, the register holds a single live range. At time\ninstant t which is the definition time of live rangeTN\ni\n, the following constraints\nmust be satisfied for each registerr\n∑\nj\nTN\nj,r,t\n≤1(9)\nwhereTN\nj,r,t\n=0fort/∈[T\ndef\nj\n,T\nend\nj\n].\nFunctional Unit Constraints:The spill loads and store generated require\nmemory functional units. Thus a spill load or a store can be scheduled at a\nparticular instanttprovided there is a free memory unit available. Hence for\nscheduling spill loads or stores, the following memory unit constraints need to\nbe satisfied for each time slot t’∈[0, II-1].\n∑\ni,r\nLT N\ni,r,t\n+\n∑\nj,r\nSTN\nj,r,t\n≤Mforallt∈[0,II−1](10)\nTN\ni\nis the live range witht∈loadset(i) andTN\nj\nis the live range witht∈\nstoreset(j).Mis the number of memory units available for spill loads and stores\nafter the memory requirements of instructions that are scheduled at time instant\ntin the kernel are satisfied. The above constraint ensures that sum of all spill\nloads and stores scheduled at any time instanttin the kernel is lesser than or\nequal to the number of free memory units available.\n\nRegister Allocation and Optimal Spill Code Scheduling135\n3.3 Objective Function\nThe objective function is to minimize the number of spill loads and stores.\nMinimize:\n∑\ni,r,t\n(STN\ni,r,t\n+LT N\ni,r,t\n)(11)\n4 Simplified Formulation\nThe previous formulation can be simplified by omitting therindices from the\nspill load and store decision variables. In this formulation, we decide whether a\nspill load or a store is necessary at a given time step without considering which\nregister the store or load should use. The constraints are suitably modified to\nreflect the same. The register used by the spill store and loads can be easily\ninferred from theTN\ni,r,t\nvariables as a post-processing step. The simplified for-\nmulation is given below:\nMinimize\n\u0000\ni,t\n(STN\ni,t\n+LT N\ni,t\n)\n\u0000\nr∈R\nTN\ni,r,t\n=1∀i∈Iandt=T\ndef\ni\n(12)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nt\n′\nLT N\ni,t\n′\n≥1∀t=T\ndef\nj\nand(13)\nj∈use(TN\ni\n)\nt\n\u0003\n∈(t\u0005lat\nload\n,t]\nLT N\ni,t\n−\n\u0000\nt”\nSTN\ni,t”\n≤0∀t∈loadset(i)∀i(14)\nt”∈[T\ndef\ni\n+lat\ni\n,t\u0005lat\nstore\n]\nSTN\ni,t\n−\n\u0000\nr\nTN\ni,r,t\n≤0∀t∈storeset(i)∀i(15)\nTN\ni,r,t\n−TN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(16)\n\u0000\nr\nTN\ni,r,t\n−\n\u0000\nr\nTN\ni,r,t\u00041\n−LT N\ni,t\n≤0∀t∈loadset(i)∀i(17)\n\u0000\nj\nTN\nj,r,t\n≤1∀t∈[0,II−1]∀r(18)\n\u0000\ni\nLT N\ni,t\n+\n\u0000\nj\nSTN\nj,t\n≤M∀t∈[0,II−1](19)\nTN\ni,r,t⊕1\n−TN\ni,r,t\n≤0∀t⊕1/∈loadset(i)∀i∀r(20)\nEquation 17 ensures that each spill load loads the live range in at-most one reg-\nister.\n\n136S.G. Nagarakatte and R. Govindarajan\n5 Experimental Evaluation\n5.1 Experimental Methodology\nWe have used the SUIF [12] as the compiler front end for the benchmarks. For\nthe compiler back end, we have used Trimaran [13] compilation and simulation\nenvironment for VLIW architectures. The data dependence graphs are generated\nusing the Trimaran’s back end . The initial modulo schedule is obtained using\nan integer linear program formulation [10]. The machine architecture used in\nthe formulation is a load-store architecture with 3 memory units, 3 integer units\nand 4 floating point units. For the constructed schedule, modulo variable expan-\nsion [14] is performed to ensure that no live range is longer than II. We then\ngenerate the formulation proposed in this paper to perform register allocation\nand necessary spill code generation and scheduling. We have considered archi-\ntectures with 16 and 32 registers. The integer linear programming formulation\nis solved using the CPLEX 9.0 solver [5] running on a Pentium 4, operating at\n3.06 GHz with 4 GB RAM. A CPU-time limit of 600 seconds is used for solving\nour integer linear program. The loops in which the integer linear program timed\nout are not considered for evaluation.\n5.2 Results\nWe compare our approach with the best performing heuristic [21], viz spilling\nuses, with a quantity factor of 0.5 and a traffic factor of 0.3. The quantity factor\nis used for deciding the number of spill candidates and traffic factor is used for\nthe selection of spill candidates.We refer to the above heuristic asSUand our\nformulation asILP.\nSpill Code.The amount of spill code introduced impacts the code quality of\nthe schedule. We evaluated the amount of spill code generated byILPandSU.\nIn this result, we do not consider amount of spill code generated with the loops\nrequiring an increase in II withSUas it is not fair to compare schedules with\nTable 1.Spill code and prevention of II increase with 32 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise25129612321.9518.33\n179.art4015465719.316.67\n183.equake429445316.98111.11\n188.ammp4614566311.11214.29\n200.sixtrack469708416.67111.11\nPerfect Club693119123719.41412.9\nTotal2689050361718.481011.11\n\nRegister Allocation and Optimal Spill Code Scheduling137\nTable 2.Spill code and prevention of II increase with 16 registers\n#loopsTotal%decrease#loops%loops\nBenchmark#loopswith regspill codein spillwithout IIwithout II\npressureILPSUcode(ILP)increase(ILP)increase(ILP)\n168.wupwise251912815215.7900\n179.art40268510619.8113.85\n183.equake42198810415.38421.05\n188.ammp462188957.3729.52\n200.sixtrack462311213114.50313.04\nPerfect Club69493133469.54918.37\nTotal26815781493412.851912.10\ndifferent initiation intervals. Table 1 and Table 2 report the amount of spill gen-\nerated for an architecture with 32 and 16 registers respectively. Though number\nof loops with higher register pressure (greater than the available registers) is\nsmall, we find that there is fairly large spill code being generated. The amount\nof spill code reduction withILPwhen compared toSUranges from 11.11% to\n21.95% for 32 registers and it ranges from 7.37% to 19.81% for 16 registers. On\nan averageILPproduces 18.48% less spill code on an average for an architecture\nwith 32 registers and 12.85% less spill code on an average for an architecture\nwith 16 registers.\nInitiation Interval.The throughput of a software pipelined loop is measured\nin terms of the initiation interval. Table 1 and Table 2 report the number of\nloops requiring an increase in the initiation interval inSUand do not require\nan increase in II while usingILP.ILPeliminates the need for an increase in II\nwhen compared toSUin 6.67% to 14.29% of the loops in various benchmarks.\nOn an average,ILPeliminates an increase in II in 11% of the loops for an\narchitecture with 32 registers and 12% of the loops for 16 registers.\n(a) 16 registers(b) 32 registers\nFig. 3.Solution time taken by ILP\n\n138S.G. Nagarakatte and R. Govindarajan\nIn summary, we observe that our ILP approach is able to reduce the amount\nof spill code by 18.48% and eliminate an increase in II by 11.11% on average\namong 90 loops on an architecture with 32 registers.\nSolution Time.In Figure 3(a) and Figure 3(b), we report the time taken by\nthe ILP, where the X-axis represents the time taken and Y-axis, the number of\nloops for which the solution can be found with the given time. For example, for\nthe case of 16 registers, 136 out of 268 loops take less than one second each. The\narithmetic mean of the time taken by ILP for each loop is 18.44 seconds in the\ncase of 16 registers and is 77.79 seconds in the case of 32 registers.\n6 Related Work\nSoftware pipelining has been extensively studied and few of the contributions\nin this area are in [6,7,14,17,19]. A comprehensive survey is available in [2]. A\nconsiderable amount of work has been doneto minimize the register requirements\nof the the software pipeline schedule. Among these, Huff [11] uses slack scheduling\nand tries to minimize the combined register pressure. In [8], ILP formulation for\ngenerating the schedule has been proposed and minimization of the number of\nbuffers required in such a scenario is addressed in [10]. A number of modulo\nscheduling heuristics that reduce the register pressure and generate schedules\nwith smallest number of registers have been proposed in [15]. All these do not\nconsider the dual problem of scheduling with a given number of registers.\nRegister allocation for software pipelined loops was proposed by Rau et al. [18].\nThey consider an architecture that incorporates rotating registers. However spill\ncode generation and scheduling was not considered. Ning et al. [16] have pro-\nposed an algorithmic framework for concurrent scheduling and register alloca-\ntion. Their approach estimates the register requirement with the help of buffers.\nZalamea et al. [21] have described methods for generating spill code when the\nregister pressure is greater than the number of registers. But they did not con-\nsider register allocation and introduction of spill code was based on heuristics.\nGoodwin et al. [9] have proposed a 0-1 integer linear programming formula-\ntion for global register allocation. Our model inherits certain ideas from their\napproach. They do not consider register allocation for software pipelined loops\nand hence does not deal with the problem of spill code scheduling in a cyclic\nschedule. Methods for generating spill code on-the-fly using heuristics have been\nproposed in [1]. Since the generation of spill code is based on heuristics, solution\nmay not always be optimal.\nInteger linear programming formulations for instruction scheduling have been\nproposed by Chang [3] and Wilken [20]. In [3], the authors consider instruction\nscheduling and spill code generation. However, they do not perform register al-\nlocation and their technique does not guarantee optimal spill code. They also\ndo not address the problem of scheduling the generated spill code in a compact\n\nRegister Allocation and Optimal Spill Code Scheduling139\ncyclic schedule. Our work, for the first time proposes an integrated formulation\nfor register allocation, optimal spill code generation and scheduling in software\npipelined schedules.\n7 Conclusions\nThe paper presents an optimal method for integrated register allocation and\nspill code scheduling in software pipelined loops, using a 0-1 integer linear pro-\ngramming formulation. We formulate it as an integer linear program because\nthe selection of a spill candidate based on a certain heuristic can generate ex-\ntraneous spill code, which in turn may necessitate an increase in the initiation\ninterval. The formulation serves as a framework with which various heuristics\ncan be evaluated. Experiments show that our formulation outperforms the best\nperforming heuristic proposed in [21]\n–By eliminating an increase in the initiation interval in 11.11% of the 90 loops\nthat had sufficient register pressure for an architecture with 32 registers and\nin 12% of the cases with 157 loops on a machine with 16 registers.\n–By generating on an average, 18.48% less spill code for an architecture with\n32 registers and 12.85 % less spill code for an architecture with 16 registers.\nAcknowledgments\nThe authors are thankful to the members of the High Performance Comput-\ning Laboratory for their useful comments and discussions. The authors are also\nthankful to the anonymous reviewer for suggesting the simplified formulation.\nThe first author acknowledges the partial support provided by the Philips re-\nsearch fellowship.\nReferences\n1. Alex Aleta, Josep M. Codina, Antonio Gonzalez, and David Kaeli. Demystifying\non-the-fly spill code.SIGPLAN Not., 40(6):180–189, 2005.\n2. Vicki H. Allan, Reese B. Jones, Randall M. Lee, and Stephen J. Allan. Software\npipelining.ACM Comput. Surv., 27(3):367–432, 1995.\n3. C.M Chen C.M Chang and C.T King. Using integer linear programming for in-\nstruction scheduling and register allocation in multi-issue processors.Computers\nand Mathematics with Applications, 34(9):1–14, 1997.\n4. Keith D. Cooper and L. Taylor Simpson. Live range splitting in a graph coloring\nregister allocator. InCC ’98: Proceedings of the 7th International Conference on\nCompiler Construction, pages 174–187, London, UK, 1998. Springer-Verlag.\n5. ILOG CPLEX:. http://www.ilog.com.\n6. James C. Dehnert and Ross A. Towle. Compiling for the cydra 5.J. Supercomput.,\n7(1-2):181–227, 1993.\n7. Kemal Ebcioglu and Alexandru Nicolau. A global resource-constrained paralleliza-\ntion technique. InICS ’89: Proceedings of the 3rd international conference on\nSupercomputing, pages 154–163, New York, NY, USA, 1989. ACM Press.\n\n140S.G. Nagarakatte and R. Govindarajan\n8. Paul Feautrier. Fine-grain scheduling under resource constraints. InLCPC ’94:\nProceedings of the 7th International Workshop on Languages and Compilers for\nParallel Computing, pages 1–15, London, UK, 1995. Springer-Verlag.\n9. David W. Goodwin and Kent D. Wilken. Optimal and near-optimal global register\nallocations using 0-1 integer programming.Softw. Pract. Exper., 26(8):929–965,\n1996.\n10. R. Govindarajan, Erik R. Altman, and Guang R. Gao. A framework for resource-\nconstrained rate-optimal software pipelining.IEEE Transactions on Parallel and\nDistributed Systems, 07(11):1133–1149, 1996.\n11. Richard A. Huff. Lifetime-sensitive modulo scheduling. InSIGPLAN Conference\non Programming Language Design and Implementation, pages 258–267, 1993.\n12. SUIF Compiler Infrastructure. http://suif.stanford.edu/suif/.\n13. Trimaran: An infrastructure for research in instruction level parallelism.\nhttp://www.trimaran.org.\n14. M. Lam. Software pipelining: an effective scheduling technique for vliw machines.\nInPLDI ’88: Proceedings of the ACM SIGPLAN1988 conference on Programming\nLanguage design and Implementation, pages 318–328, New York, NY, USA, 1988.\nACM Press.\n15. Josep Llosa, Mateo Valero, and Eduard Ayguade.Heuristics for register-\nconstrained software pipelining. InMICRO 29: Proceedings of the 29th annual\nACM/IEEE international symposium on Microarchitecture, pages 250–261, Wash-\nington, DC, USA, 1996. IEEE Computer Society.\n16. Qi Ning and Guang R. Gao. A novel framework of register allocation for soft-\nware pipelining. InConference Record of the Twentieth Annual ACM SIGPLAN-\nSIGACT Symposium on Principles of Programming Languages, pages 29–42,\nCharleston, South Carolina, 1993.\n17. B. R. Rau and C. D. Glaeser. Some scheduling techniques and an easily schedulable\nhorizontal architecture for high performance scientific computing. InMICRO 14:\nProceedings of the 14th annual workshop on Microprogramming, pages 183–198,\nPiscataway, NJ, USA, 1981. IEEE Press.\n18. B. R. Rau, M. Lee, P. P. Tirumalai, and M. S. Schlansker. Register allocation for\nsoftware pipelined loops.SIGPLAN Not., 27(7):283–299, 1992.\n19. B. Ramakrishna Rau. Iterative modulo scheduling: an algorithm for software\npipelining loops. InMICRO 27: Proceedings of the 27th annual international sym-\nposium on Microarchitecture, pages 63–74, New York, NY, USA, 1994. ACM Press.\n20. Kent Wilken, Jack Liu, and Mark Heffernan. Optimal instruction scheduling us-\ning integer programming. InPLDI ’00: Proceedings of the ACM SIGPLAN2000\nconference on Programming language design and implementation, pages 121–133,\nNew York, NY, USA, 2000. ACM Press.\n21. Javier Zalamea, Josep Llosa, Eduard Ayguade, and Mateo Valero. Improved spill\ncode generation for software pipelined loops. InPLDI ’00: Proceedings of the ACM\nSIGPLAN 2000 conference on Programming language design and implementation,\npages 134–144, New York, NY, USA, 2000. ACM Press.", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 23 + ] + ], + "date-time": "2024-01-23T20:08:48Z", + "timestamp": 1706040528010 + }, + "publisher-location": "Berlin, Heidelberg", + "reference-count": 21, + "publisher": "Springer Berlin Heidelberg", + "isbn-type": [ + { + "value": "9783540712282", + "type": "print" + }, + { + "value": "9783540712299", + "type": "electronic" + } + ], + "content-domain": { + "domain": [], + "crossmark-restriction": false + }, + "DOI": "10.1007/978-3-540-71229-9_9", + "type": "book-chapter", + "created": { + "date-parts": [ + [ + 2007, + 7, + 1 + ] + ], + "date-time": "2007-07-01T17:39:13Z", + "timestamp": 1183311553000 + }, + "page": "126-140", + "source": "Crossref", + "is-referenced-by-count": 11, + "title": "Register Allocation and Optimal Spill Code Scheduling in Software Pipelined Loops Using 0-1 Integer Linear Programming Formulation", + "prefix": "10.1007", + "author": [ + { + "given": "Santosh G.", + "family": "Nagarakatte", + "sequence": "first", + "affiliation": [] + }, + { + "given": "R.", + "family": "Govindarajan", + "sequence": "additional", + "affiliation": [] + } + ], + "member": "297", + "reference": [ + { + "issue": "6", + "key": "9_CR1", + "doi-asserted-by": "publisher", + "first-page": "180", + "DOI": "10.1145/1064978.1065032", + "volume": "40", + "author": "A. Aleta", + "year": "2005", + "unstructured": "Aleta, A., et al.: Demystifying on-the-fly spill code. SIGPLAN Not. 40(6), 180–189 (2005), doi:10.1145/1064978.1065032", + "journal-title": "SIGPLAN Not." + }, + { + "issue": "3", + "key": "9_CR2", + "doi-asserted-by": "publisher", + "first-page": "367", + "DOI": "10.1145/212094.212131", + "volume": "27", + "author": "V.H. Allan", + "year": "1995", + "unstructured": "Allan, V.H., et al.: Software pipelining. ACM Comput. Surv. 27(3), 367–432 (1995)", + "journal-title": "ACM Comput. Surv." + }, + { + "issue": "9", + "key": "9_CR3", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1016/S0898-1221(97)00184-3", + "volume": "34", + "author": "C.M. Chen", + "year": "1997", + "unstructured": "Chen, C.M., Chang, C.M., King, C.T.: Using integer linear programming for instruction scheduling and register allocation in multi-issue processors. Computers and Mathematics with Applications 34(9), 1–14 (1997)", + "journal-title": "Computers and Mathematics with Applications" + }, + { + "key": "9_CR4", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "174", + "DOI": "10.1007/BFb0026430", + "volume-title": "Compiler Construction", + "author": "K.D. Cooper", + "year": "1998", + "unstructured": "Cooper, K.D., Simpson, L.T.: Live range splitting in a graph coloring register allocator. In: Koskimies, K. (ed.) CC 1998 and ETAPS 1998. LNCS, vol. 1383, pp. 174–187. Springer, Heidelberg (1998)" + }, + { + "key": "9_CR5", + "unstructured": "ILOG CPLEX: http://www.ilog.com" + }, + { + "issue": "1-2", + "key": "9_CR6", + "doi-asserted-by": "publisher", + "first-page": "181", + "DOI": "10.1007/BF01205184", + "volume": "7", + "author": "J.C. Dehnert", + "year": "1993", + "unstructured": "Dehnert, J.C., Towle, R.A.: Compiling for the cydra 5. J. Supercomput. 7(1-2), 181–227 (1993)", + "journal-title": "J. Supercomput." + }, + { + "key": "9_CR7", + "doi-asserted-by": "publisher", + "first-page": "154", + "DOI": "10.1145/318789.318807", + "volume-title": "ICS ’89: Proceedings of the 3rd international conference on Supercomputing", + "author": "K. Ebcioglu", + "year": "1989", + "unstructured": "Ebcioglu, K., Nicolau, A.: A global resource-constrained parallelization technique. In: ICS ’89: Proceedings of the 3rd international conference on Supercomputing, Crete, Greece, pp. 154–163. ACM Press, New York (1989), doi:10.1145/318789.318807" + }, + { + "key": "9_CR8", + "series-title": "Lecture Notes in Computer Science", + "doi-asserted-by": "publisher", + "first-page": "1", + "DOI": "10.1007/BFb0025867", + "volume-title": "Languages and Compilers for Parallel Computing", + "author": "P. Feautrier", + "year": "1995", + "unstructured": "Feautrier, P.: Fine-grain scheduling under resource constraints. In: Pingali, K.K., et al. (eds.) LCPC 1994. LNCS, vol. 892, pp. 1–15. Springer, Heidelberg (1995)" + }, + { + "issue": "8", + "key": "9_CR9", + "doi-asserted-by": "publisher", + "first-page": "929", + "DOI": "10.1002/(SICI)1097-024X(199608)26:8<929::AID-SPE40>3.0.CO;2-T", + "volume": "26", + "author": "D.W. Goodwin", + "year": "1996", + "unstructured": "Goodwin, D.W., Wilken, K.D.: Optimal and near-optimal global register allocations using 0-1 integer programming. Softw. Pract. Exper. 26(8), 929–965 (1996)", + "journal-title": "Softw. Pract. Exper." + }, + { + "issue": "11", + "key": "9_CR10", + "doi-asserted-by": "publisher", + "first-page": "1133", + "DOI": "10.1109/71.544355", + "volume": "7", + "author": "R. Govindarajan", + "year": "1996", + "unstructured": "Govindarajan, R., Altman, E.R., Gao, G.R.: A framework for resource-constrained rate-optimal software pipelining. IEEE Transactions on Parallel and Distributed Systems 7(11), 1133–1149 (1996), doi:10.1109/71.544355", + "journal-title": "IEEE Transactions on Parallel and Distributed Systems" + }, + { + "key": "9_CR11", + "doi-asserted-by": "crossref", + "unstructured": "Huff, R.A.: Lifetime-sensitive modulo scheduling. In: SIGPLAN Conference on Programming Language Design and Implementation, pp. 258–267 (1993), citeseer.ist.psu.edu/84558.html", + "DOI": "10.1145/173262.155115" + }, + { + "key": "9_CR12", + "unstructured": "SUIF Compiler Infrastructure, http://suif.stanford.edu/suif/" + }, + { + "key": "9_CR13", + "unstructured": "Trimaran: An infrastructure for research in instruction level parallelism, http://www.trimaran.org" + }, + { + "key": "9_CR14", + "doi-asserted-by": "publisher", + "first-page": "318", + "DOI": "10.1145/53990.54022", + "volume-title": "PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation", + "author": "M. Lam", + "year": "1988", + "unstructured": "Lam, M.: Software pipelining: an effective scheduling technique for vliw machines. In: PLDI ’88: Proceedings of the ACM SIGPLAN 1988 conference on Programming Language design and Implementation, Atlanta, Georgia, United States, pp. 318–328. ACM Press, New York (1988), doi:10.1145/53990.54022" + }, + { + "key": "9_CR15", + "doi-asserted-by": "publisher", + "first-page": "250", + "DOI": "10.1109/MICRO.1996.566466", + "volume-title": "MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture", + "author": "J. Llosa", + "year": "1996", + "unstructured": "Llosa, J., Valero, M., Ayguade, E.: Heuristics for register-constrained software pipelining. In: MICRO 29: Proceedings of the 29th annual ACM/IEEE international symposium on Microarchitecture, Paris, France, pp. 250–261. IEEE Computer Society, Washington (1996)" + }, + { + "key": "9_CR16", + "doi-asserted-by": "crossref", + "first-page": "29", + "DOI": "10.1145/158511.158519", + "volume-title": "Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages", + "author": "Q. Ning", + "year": "1993", + "unstructured": "Ning, Q., Gao, G.R.: A novel framework of register allocation for software pipelining. In: Conference Record of the Twentieth Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, Charleston, South Carolina, pp. 29–42. ACM Press, New York (1993), citeseer.ist.psu.edu/ning93novel.html" + }, + { + "key": "9_CR17", + "first-page": "183", + "volume-title": "MICRO 14: Proceedings of the 14th annual workshop on Microprogramming", + "author": "B.R. Rau", + "year": "1981", + "unstructured": "Rau, B.R., Glaeser, C.D.: Some scheduling techniques and an easily schedulable horizontal architecture for high performance scientific computing. In: MICRO 14: Proceedings of the 14th annual workshop on Microprogramming, Chatham, Massachusetts, United States, pp. 183–198. IEEE Press, Piscataway (1981)" + }, + { + "issue": "7", + "key": "9_CR18", + "doi-asserted-by": "publisher", + "first-page": "283", + "DOI": "10.1145/143103.143141", + "volume": "27", + "author": "B.R. Rau", + "year": "1992", + "unstructured": "Rau, B.R., et al.: Register allocation for software pipelined loops. SIGPLAN Not. 27(7), 283–299 (1992), doi:10.1145/143103.143141", + "journal-title": "SIGPLAN Not." + }, + { + "key": "9_CR19", + "doi-asserted-by": "publisher", + "first-page": "63", + "DOI": "10.1145/192724.192731", + "volume-title": "MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture", + "author": "B.R. Rau", + "year": "1994", + "unstructured": "Rau, B.R.: Iterative modulo scheduling: an algorithm for software pipelining loops. In: MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture, San Jose, California, United States, pp. 63–74. ACM Press, New York (1994), doi:10.1145/192724.192731" + }, + { + "key": "9_CR20", + "doi-asserted-by": "publisher", + "first-page": "121", + "DOI": "10.1145/349299.349318", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "K. Wilken", + "year": "2000", + "unstructured": "Wilken, K., Liu, J., Heffernan, M.: Optimal instruction scheduling using integer programming. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 121–133. ACM Press, New York (2000), doi:10.1145/349299.349318" + }, + { + "key": "9_CR21", + "doi-asserted-by": "publisher", + "first-page": "134", + "DOI": "10.1145/349299.349319", + "volume-title": "PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation", + "author": "J. Zalamea", + "year": "2000", + "unstructured": "Zalamea, J., et al.: Improved spill code generation for software pipelined loops. In: PLDI ’00: Proceedings of the ACM SIGPLAN 2000 conference on Programming language design and implementation, Vancouver, British Columbia, Canada, pp. 134–144. ACM Press, New York (2000), doi:10.1145/349299.349319" + } + ], + "container-title": "Lecture Notes in Computer Science", + "original-title": [], + "link": [ + { + "URL": "http://link.springer.com/content/pdf/10.1007/978-3-540-71229-9_9.pdf", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2020, + 11, + 19 + ] + ], + "date-time": "2020-11-19T05:17:09Z", + "timestamp": 1605763029000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "http://link.springer.com/10.1007/978-3-540-71229-9_9" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + null + ] + ] + }, + "ISBN": [ + "9783540712282", + "9783540712299" + ], + "references-count": 21, + "URL": "http://dx.doi.org/10.1007/978-3-540-71229-9_9", + "relation": {} + } + }, + "doi_10.1145/512529.512563": { + "path": [ + "cyclone [jendeley doi 10_1145_512529_512563].pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nRegion-Based Memory Management in Cyclone\n∗\nDan GrossmanGreg MorrisettTrevor Jim\n†\nMichael HicksYanling WangJames Cheney\nComputer Science Department\nCornell University\nIthaca, NY 14853\n{danieljg,jgm,mhicks,wangyl,jcheney}@cs.cornell.edu\n†\nAT&T Labs Research\n180 Park Avenue\nFlorham Park, NJ 07932\ntrevor@research.att.com\nABSTRACT\nCyclone is a type-safe programming language derived from\nC. The primary design goal of Cyclone is to let program-\nmers control data representation and memory management\nwithout sacrificing type-safety. In this paper, we focus on\nthe region-based memory management of Cyclone and its\nstatic typing discipline. The design incorporates several ad-\nvancements, including support for region subtyping and a\ncoherent integration with stack allocation and a garbage col-\nlector. To support separate compilation, Cyclone requires\nprogrammers to write some explicit region annotations, but\na combination of default annotations, local type inference,\nand a novel treatment of region effects reduces this burden.\nAs a result, we integrate C idioms in a region-based frame-\nwork. In our experience, porting legacy C to Cyclone has\nrequired altering about 8% of the code; of the changes, only\n6% (of the 8%) were region annotations.\nCategories and Subject Descriptors\nD.3.3 [Programming Languages]: Language Constructs\nand Features—dynamic storage management\nGeneral Terms\nLanguages\n1.INTRODUCTION\nMany software systems, including operating systems, de-\nvice drivers, file servers, and databases require fine-grained\n∗\nThis research was supported in part by Sloan grant BR-\n3734; NSF grant 9875536; AFOSR grants F49620-00-1-\n0198, F49620-01-1-0298, F49620-00-1-0209, and F49620-01-\n1-0312; ONR grant N00014-01-1-0968; and NSF Graduate\nFellowships. Any opinions, findings, and conclusions or rec-\nommendations expressed in this publication are those of the\nauthors and do not reflect the views of these agencies.\nPermission to make digital or hard copies of all or part of this work for\npersonal or classroom use is granted without fee provided that copies are\nnot made or distributed for profit or commercial advantage and that copies\nbear this notice and the full citation on the first page. To copy otherwise, to\nrepublish, to post on servers or to redistribute to lists, requires prior specific\npermission and/or a fee.\nPLDI’02,June 17-19, 2002, Berlin, Germany.\nCopyright 2002 ACM 1-58113-463-0/02/0006 ...\n$5.00.\ncontrol over data representation (e.g., field layout) and re-\nsource management (e.g., memory management). Thede\nfactolanguage for coding such systems is C. However, in\nproviding low-level control, C admits a wide class of danger-\nous — and extremely common — safety violations, such as\nincorrect type casts, buffer overruns, dangling-pointer deref-\nerences, and space leaks. As a result, building large systems\nin C, especially ones including third-party extensions, is per-\nilous. Higher-level, type-safe languages avoid these draw-\nbacks, but in so doing, they often fail to give programmers\nthe control needed in low-level systems. Moreover, porting\nor extending legacy code is often prohibitively expensive.\nTherefore, a safe language at the C level of abstraction, with\nan easy porting path, would be an attractive option.\nToward this end, we have developedCyclone[6, 19], a\nlanguage designed to be very close to C, but also safe. We\nhave written or ported over 110,000 lines of Cyclone code,\nincluding the Cyclone compiler, an extensive library, lexer\nand parser generators, compression utilities, device drivers,\na multimedia distribution overlay network, a web server,\nand many smaller benchmarks. In the process, we identified\nmany common C idioms that are usually safe, but which the\nC type system is too weak to verify. We then augmented the\nlanguage with modern features and types so that program-\nmers can still use the idioms, but have safety guarantees.\nFor example, to reduce the need for type casts, Cyclone\nhas features like parametric polymorphism, subtyping, and\ntagged unions. To prevent bounds violations without mak-\ning hidden data-representation changes, Cyclone has a va-\nriety of pointer types with different compile-time invariants\nand associated run-time checks. Other projects aimed at\nmaking legacy C code safe have addressed these issues with\nsomewhat different approaches, as discussed in Section 7.\nIn this paper, we focus on the most novel aspect of Cy-\nclone: its system for preventing dangling-pointer derefer-\nences and space leaks. The design addresses several seem-\ningly conflicting goals. Specifically, the system is:\n•Sound:Programs never dereference dangling pointers.\n•Static:Dereferencing a dangling pointer is a compile-\ntime error. No run-time checks are needed to deter-\nmine if memory has been deallocated.\n•Convenient:We minimize the need for explicit pro-\ngrammer annotations while supporting many C id-\nioms. In particular, many uses of the addresses of local\nvariables require no modification.\n\n282\n\n•Exposed:Programmers control where objects are allo-\ncated and how long they live. As usual, local variables\nare always allocated on the stack.\n•Comprehensive:We treat all memory uniformly, in-\ncluding the stack, the heap (which can optionally be\ngarbage-collected), and “growable” regions.\n•Scalable:The system supports separate compilation,\nas all analyses are intraprocedural.\nFollowing the seminal work of Tofte and Talpin [28], the\nsystem isregion-based: each object lives in one region and,\nwith the exception that a distinguished heap region may be\ngarbage collected, a region’s objects are all deallocated si-\nmultaneously. As a static system for an explicitly typed,\nlow-level language, Cyclone’s region framework makes sev-\neral technical contributions over previous work, notably:\n•Region subtyping:A last-in-first-out discipline on re-\ngion lifetimes induces an “outlives” relationship on re-\ngions, which, in turn, allows us to provide a useful\nsubtyping discipline on pointer types.\n•Simple effects:We eliminate the need for effect vari-\nables (which complicate interfaces) through the use of\na“regions_of” type operator.\n•Default annotations:We combine a local inference al-\ngorithm with a system of defaults to reduce the need\nfor explicit region annotations.\n•Integration of existential types:The combination of\nregion subtyping and simple effects makes the integra-\ntion of first-class abstract data types relatively simple.\nWe have found Cyclone’s region system sufficiently ex-\npressive for porting legacy C code and writing new applica-\ntions. In our experience, porting C code has required alter-\ning about 8% of the code, and the vast majority of changes\nhave not been region annotations. Furthermore, Cyclone\nperformed as well as C for the network applications we con-\nsidered, and within a factor of three for more computation-\nally intense programs.\nIn this paper, we demonstrate our contributions, begin-\nning with a general description of the system suitable for\nprogrammers (Section 2). We then present a more techni-\ncal discussion of our novel effect system and its interaction\nwith existential types (Section 3). We continue with a core\nformal language that we have proven sound (Section 4), an\noverview of our implementation (Section 5), and a study of\nthe burden of porting C code to Cyclone and the resulting\nperformance (Section 6). We discuss related work in Sec-\ntion 7 and future work in Section 8.\n2.USING CYCLONE REGIONS\nThis section presents the programmer’s view of Cyclone’s\nmemory-management system. It starts with the constructs\nfor creating regions, allocating objects, and so on — this\npart is simple because the departure from C is small. We\nnext present the corresponding type system, which is more\ninvolved because every pointer type carries a region annota-\ntion. Then we show how regions’ lifetimes induce subtyping\non pointer types. At that point, the type syntax is quite ver-\nbose, so we explain the features that, in practice, eliminate\nalmost all region annotations. Throughout, we take the lib-\nerty of using prettier syntax (e.g., Greek letters) than actual\nCyclone. For the ASCII syntax and a less region-oriented\nintroduction to Cyclone, see the user’s manual [6].\n2.1 Basic Operations\nIn Cyclone, all memory is in some region, of which there\nare three kinds:\n•A single heap region, which conceptually lives forever\n•Stack regions, which correspond to local-declaration\nblocks, as in C\n•Dynamic regions, which have lexically scoped lifetimes\nbut permit unlimited allocation into them\nStatic data objects reside in the heap. Primitivesmalloc\nandnewcreate new heap objects. Thenewoperation is\nlikemallocexcept that it takes an expression and initial-\nizes the memory with it. There is no explicit mechanism\nfor reclaiming heap-allocated objects (e.g.,free). However,\nCyclone programs may optionally link against the Boehm-\nDemers-Weiser conservative garbage collector [4] to reclaim\nunreachable heap-allocated objects implicitly. The interac-\ntion of the collector with regions is discussed in Section 5.\nStack regions correspond directly to C’s local-declaration\nblocks: entering a block with local declarations creates stor-\nage with a lifetime corresponding to the lexical scope of the\nblock. Function parameters are in a stack region correspond-\ning to the function’s lifetime. In short, Cyclone local dec-\nlarations and function parameters have exactly the same\nlayout and lifetime as in C.\nDynamic regions are created with the constructregion\nr{s},whereris an identifier andsis a statement. The\nregion’s lifetime is the execution ofs.Ins,ris bound to\naregionhandle, which primitivesrmallocandrnewuse to\nallocate objects into the associated region. For example,\nrnew(r) 3returns a pointer to anintallocated in the re-\ngion of handlerand initialized to 3. Handles are first-class\nvalues; a caller may pass a handle to a function to allow it\nto allocate into the associated region. A predefined constant\nheap_regionis a handle for the heap.\nLike a declaration block, a dynamic region is deallocated\nprecisely when execution leaves the body of the enclosed\nstatement. Execution can leave due to unstructured jumps\n(continue,goto,etc.),areturn, or via an exception. Sec-\ntion 5 explains how we compile dynamic-region deallocation.\nThe region system imposes no changes on the represen-\ntation of pointers or the meaning of operators such as&\nand*. There are no hidden fields or reference counts for\nmaintaining region information at run-time. Pointers to ar-\nrays of unknown size (denotedτ?) are implemented with\nextra fields to support bounds-checks, but this design is or-\nthogonal to regions. All the infrastructure for preventing\ndangling-pointer dereferences is in the static type system,\nmaking such dereferences a compile-time error.\n2.2 Basic Type System\nRegion Annotations.All pointers point into exactly one\nregion. In principle, pointer types are annotated with the\nregion nameof the region they point into, though in practice\nwe eliminate most annotations. Ignoring subtyping,int*ρ\ndescribes a pointer to anintthat is in the region whose\n\n283\n\nchar?ρstrcpy<ρ, ρ\n2\n>(char?ρd, const char?ρ\n2\ns);\nchar?ρ\nH\nstrdup<ρ>(const char?ρs);\nchar?ρrstrdup<ρ, ρ\n2\n>(region_t<ρ>,const char?ρ\n2\ns);\nsize_t strlen<ρ>(const char?ρs);\nFigure 1: Cyclone string library prototypes\nname isρ. The invariant that pointers have a particular\nregion is the basic restriction we impose to make the unde-\ncidable problem of detecting dangling-pointer dereferences\ntractable. Pointer types with different region names are dif-\nferent types. A handle for a region corresponding toρhas\nthe typeregion_t<ρ>.\nRegion names fall into four categories. The region name\nfor the heap isρ\nH\n. A block labeledL(e.g.,L:{int x=0;s})\nhas nameρ\nL\nand refers to the stack region that the block\ncreates. Similarly, the arguments of a functionfare stored\nin the stack regionρ\nf\n. Finally, the statementregion r {s}\ndefines region nameρ\nr\nfor the created region. Sorhas\ntyperegion_t<ρ\nr\n>. In all cases, the scope of a region name\ncorresponds to the lifetime of the corresponding region.\nWe can now give types to some small examples. Ife\n1\nhas\ntyperegion_t<ρ>ande\n2\nhas typeτ,thenrnew (e\n1\n)e\n2\nhas\ntypeτ*ρ.Ifint xis declared in blockL,then&xhas type\nint*ρ\nL\n. Similarly, ifehas typeτ*ρ,then&*ehas typeτ*ρ.\nPreventing dangling-pointer dereferences.To derefer-\nence a pointer, safety demands that its region be live. Our\ngoal is to determine at compile-time that no code follows\na dangling pointer. It often suffices to ensure that pointer\ntypes’ region names are in scope. For example, this code is\nill-typed:\n1. int*ρ\nL\np;\n2. L:{ int x = 0;\n3. p = &x;\n4. }\n5. *p = 42;\nThe code creates storage forxat line 2 and deallocates it at\nline 4, so the assignment of&xtopcreates a dangling pointer\nthat is dereferenced in line 5. Cyclone rejects this code be-\ncauseρ\nL\nis not in scope whenpis declared. If we change\nthe declaration ofpto another region, then the assignment\np=&xfails to type-check because&xhas typeint*ρ\nL\n.\nHowever, Cyclone’s advanced features, notably existential\nand universal polymorphism, conspire to allow pointers to\nescape the scope of their regions, just as closures allow point-\ners to escape in the original Tofte-Talpin work. Therefore,\nin general, we cannot rely on simple scoping mechanisms to\nensure soundness. Instead, we must track the set of live re-\ngion names at each control-flow point. To keep the analysis\nintraprocedural, we use a novel type-and-effects system to\ntrack interprocedural liveness requirements. We delay the\nfull discussion of effects until Section 3.\nRegion Polymorphism.Functions in Cyclone areregion-\npolymorphic; they can abstract the actual regions of their\narguments or results. That way, functions can manipulate\npointers regardless of whether they point into the stack, the\nheap, or a dynamic region.\nFigure 1 presents some prototypes from the Cyclone string\nlibrary, includingstrcpy,strdup,andstrlen, and a region-\nallocating functionrstrdup.The?is Cyclone notation for\na pointer to a dynamically sized array. These functions all\nexhibit region polymorphism. Instrcpy, the parameters’\nregion namesρandρ\n2\nare abstracted by the syntax<ρ, ρ\n2\n>,\nmeaning they can be instantiated with any actual region\nname when the function is called. So we can write code like:\nL:{ char buf[20];\nstrcpy<ρ\nL\n,ρ\nH\n>(buf,\"a heap pointer\"); }\nHere, the syntax<ρ\nL\n,ρ\nH\n>in the call instantiatesρ\n2\nwith\nthe heap regionρ\nH\nandρwith the stack regionρ\nL\n, allowing\none to copy a string from the heap to the stack.\nRegion polymorphism can guarantee region equalities of\nunknown regions by using the same region names. For ex-\nample, instrcpythe region names of the first argument and\nthe return value are the same, so the returned pointer must\npoint to the same region as the first argument. Region-name\nequalities are also important for dynamic regions. For exam-\nple, therstrdupfunction is a version ofstrdupthat copies\nthe source string into a dynamic region. In its prototype,\ntheregionnameofthereturnedvalueρmatches the region\nname of the dynamic region handleregion_t<ρ>.Infact,\nwe implementstrdupby just callingrstrdup:\nchar?ρ\nH\nstrdup<ρ>(const char?ρs) {\nreturn rstrdup<ρ\nH\n,ρ>(heap_region,s);\n}\nPolymorphic Recursion.It is often valuable to instanti-\nate the region parameters of a recursive function call with\ndifferent names than the function’s own region arguments.\nAs an example, this contrived program has a functionfact\nthat abstracts a regionρand takes as arguments a pointer\nintoρand an integer.\nvoid fact<ρ>(int*ρresult, int n) {\nL: { int x = 1;\nif(n > 1) fact<ρ\nL\n>(&x,n-1);\n*result = x*n; }\n}\nint g = 0;\nint main() { fact<ρ\nH\n>(&g,6); return g; }\nWhen executed, the program returns the value 720. In\nmain,wepassfacta heap pointer (&g), so the type offact\nis instantiated withρ\nH\nforρ. In contrast, the recursive call\ninstantiatesρwithρ\nL\n, which is the name of the stack region.\nAt run time, the first call tofactmodifiesg;eachrecursive\ncall modifies the value ofxin its caller’s stack frame.\nType Definitions.Becausestructdefinitions can contain\npointers, Cyclone allows these definitions to be parameter-\nized by region names. For example, here is a declaration for\nlists of pointers to ints:\nstruct Lst<ρ\n1\n,ρ\n2\n>{\nint*ρ\n1\nhd;\nstruct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\ntl;\n};\nIgnoring subtyping, a value of typestruct Lst<ρ\n1\n,ρ\n2\n>\nis a list withhdfields that point intoρ\n1\nandtlfields that\npoint intoρ\n2\n. Other invariants are possible: If the type\noftlwerestruct Lst<ρ\n2\n,ρ\n1\n>*ρ\n2\n, the declaration would\n\n284\n\nchar?ρstrcpy(char?ρd, const char? s);\nchar? strdup(const char? s);\nchar?ρrstrdup(region_t<ρ>,const char? s);\nsize_t strlen(const char? s);\nFigure 2: Cyclone prototypes minimally-annotated\ndescribe lists where the regions forhdandtlalternated at\neach element.\nType abbreviations usingtypedefcan also have region\nparameters. For example, we can define region-allocated\nlists of heap-allocated pointers with:\ntypedef struct Lst<ρ\nH\n,ρ>*ρlist_t<ρ>;\n2.3 Subtyping\nAlthough the type system we have described thus far is\nquite powerful, it is not expressive enough in some cases.\nFor example, it is common to define a local variable to al-\nternatively hold the value of one of its arguments:\nvoid f<ρ\n1\n,ρ\n2\n>(int b, int*ρ\n1\np1, int*ρ\n2\np2) {\nL: { int*ρ\nL\np;\nif(b) p = p1; else p=p2;\n/* ...do something with p... */ }\n}\nIt appears that the program should fail to type-check be-\ncause neitherp1norp2has typeint*ρ\nL\n. If we change the\ntype ofptoint*ρ\n1\norint*ρ\n2\n, then one of the assignments\nis illegal.\nTo solve this problem, we observe that if the region cor-\nresponding toρ\n1\noutlivesthe region corresponding toρ\n2\n,\nthen it is sound to use a value of typeτ*ρ\n1\nwhereweex-\npect one of typeτ*ρ\n2\n. Cyclone supports such coercions\nimplicitly. The last-in-first-out region discipline makes such\noutlives relationships common: when we create a region, we\nknow every region currently alive will outlive it. Simple sub-\ntyping based on this outlives relationship allows the above\nprogram to type-check.\nRegion-polymorphic functions can specify outlives rela-\ntionships among their arguments with explicit preconditions\nthat express partial orders on region lifetimes. In practice,\nwe have very rarely used this feature, because the local out-\nlives information has sufficed.\nTo ensure soundness, we do not allow castingτ\n1\n*ρtoτ\n2\n*ρ,\neven ifτ\n1\nis a subtype ofτ\n2\n, as this cast would allow putting\naτ\n2\nin a location where other code expects aτ\n1\n.(Thisprob-\nlem is the usual one with covariant subtyping on references.)\nHowever, Cyclone does allow casts fromτ\n1\n*ρtoconstτ\n2\n*ρ\n2\nwhenτ\n1\nis a subtype ofτ\n2\n. To ensure soundness, we must\nenforce read-only access forconstvalues (unlike C). This\nsupport for “deep” subtyping, when combined with poly-\nmorphic recursion, is powerful enough to allow stack alloca-\ntion of some recursive structures of arbitrary size.\n2.4 Eliminating Annotations\nAlthough Cyclone is explicitly typed in principle, we use a\ncombination of inference and well-chosen defaults to reduce\ndramatically the number of annotations needed in practice.\nWe emphasize that our approach to inference is purely in-\ntraprocedural and that prototypes for functions are never\ninferred. Rather, we use a default completion of partial\nprototypes to minimize region annotations. This approach\npermits separate compilation.\nWhen writing a pointer type (e.g.,int*), the region an-\nnotation is always optional; the compiler deduces an appro-\npriate annotation based on context:\n1. For local declarations, a unification-based inference en-\ngine infers the annotation from the declaration’s (in-\ntraprocedural) uses. This local inference works well in\npractice, especially when declarations have initializers.\n2. Omitted region names in argument types are filled in\nwith fresh region names that are generalized implic-\nitly. So by default, functions are region polymorphic\nwithout any region equalities.\n3. In all other contexts (return types, globals, type defini-\ntions), omitted region names are filled in withρ\nH\n(i.e.,\nthe heap). This default works well for global variables\nand for functions that return heap-allocated results.\nHowever, it fails for functions likestrcpythat return\none of their parameters. Without looking at the func-\ntion body, we cannot determine which parameter (or\ncomponent of a parameter) the function might return.\nIn addition, when calling a region-polymorphic function,\nthe programmer can omit the explicit region-name instan-\ntiation and the inference engine discovers it. As a result of\nthese devices, ourfactexample can become annotation-free:\nvoid fact(int* result, int n) {\nint x = 1;\nif(n > 1) fact(&x,n-1);\n*result = x*n;\n}\nPut another way, the function above, when treated as C\ncode, ports to Cyclone with no modification. Figure 2 shows\nthe same string-library functions as Figure 1, but minimally\nannotated. In all cases, the lack of a region annotation on\nthe argumentsmeans the type-checker would insert a fresh\nregion name for the pointer type, and generalize it. The\nlack of an annotation on the return type ofstrdupdefaults\nto the heap. In total, five region annotations were removed\nand all generalization became implicit.\nWhile the default annotations and inference engine reduce\nthe burden on the programmer and make porting easier, it is\nstill necessary to put in some explicit annotations to express\nequalities necessary for safety. For example, if we write:\nvoid f2(int** pp, int* p) {*pp=p;}\nthen the code elaborates to:\nvoid f2<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int *ρ\n1\n*ρ\n2\npp, int *ρ\n3\np) {*pp=p;}\nwhich fails to type-check becauseint*ρ\n1\n\u0001=int*ρ\n3\n.The\nprogrammer must insert an explicit region annotation to\nassert an appropriate equality relation on the parameters:\nvoid f2(int*ρ* pp, int*ρp){*pp=p;}\nFinally, we employ another technique that greatly reduces\nannotations in practice, with regard to type definitions. We\ncan partially apply parameterized type definitions; elided\narguments are filled in via the same rules used for pointer\ntypes. Here is an aggressive use of this feature:\n\n285\n\ntypedef struct Lst<ρ\n1\n,ρ\n2\n>*ρ\n2\nl_t<ρ\n1\n,ρ\n2\n>;\nl_t heap_copy(l_t l) {\nl_t ans = NULL;\nfor(l_t l2 = l; l2 != NULL; l2 = l2->tl)\nans = new Lst(new *l2->hd,ans);\nreturn ans;\n}\nBecause of defaults, the parameter type isl_t<ρ\n1\n,ρ\n2\n>and\nthe return type isl_t<ρ\nH\n,ρ\nH\n>. Because of inference, the\ncompiler givesansthe typel_t<ρ\nH\n,ρ\nH\n>(thereturnstate-\nment requiresansto have the function’s return type) and\nl2the typel_t<ρ\n1\n,ρ\n2\n>(l2’s initializer (l) has this type).\n3.EFFECTS\nWe argued in Section 2.2 that the scope restrictions on re-\ngion names prevent pointers from escaping the scope of their\nregion. In particular, a function or block cannot return or\nassign a value of typeτ*ρoutside the scope ofρ’s definition,\nsimply because you cannot write down a (well-formed) type\nfor the result. Indeed, if Cyclone had no mechanisms for\ntype abstraction, this property would hold.\nBut if there is some way to hide a pointer’s type in a result,\nthen the pointer could escape the scope of its region. For\ninstance, if Cyclone had (upwards-escaping) closures, then\none could hide a pointer to a local variable in the closure’s\nenvironment, and return the closure outside the scope of\nthe variable, thereby introducing a dangling pointer. This,\nin and of itself, is not a problem, but if the closure is later in-\nvoked, then it might dereference the dangling pointer. This\nis the critical problem that Tofte and Talpin address for\nfunctional languages.\nCyclone does not have closures, but it has other typing\nconstructs that hide regions. In particular, Cyclone provides\nexistential types [22, 14], which suffice to encode closures [21]\nand simple forms of objects [5]. Therefore, it is possible in\nCyclone for pointers to escape the scope of their regions.\nTo address this problem, the Cyclone type system keeps\ntrack of the subset of region names that are considered live\nat each control-flow point. Following Walker, Crary, and\nMorrisett [29], we call the set of live regions thecapability.\nTo allow dereferencing a pointer, the type system ensures\nthat the associated region name is in the capability. Simi-\nlarly, to allow a function call, Cyclone ensures that regions\nthe function might access are all live. To this end, func-\ntion types carry aneffectthat records the set of regions\nthe function might access. The idea of using effects to en-\nsure soundness is due to Tofte and Talpin (hereafter TT).\nHowever, our treatment of effects differs substantially from\nprevious work.\nThe first major departure from TT is that we calculate\ndefault effects from the function prototype alone (instead of\ninferring them from the function body) in order to preserve\nseparate compilation. The default effect includes the set of\nregion names that appear in the argument or result types.\nFor instance, given the prototype:\nint*ρ\n1\nf(int*, int*ρ\n1\n*);\nwhich elaborates to:\nint*ρ\n1\nf<ρ\n1\n,ρ\n2\n,ρ\n3\n>(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n);\nthe default effect is{ρ\n1\n,ρ\n2\n,ρ\n3\n}. In the absence of poly-\nmorphism, this default effect is a conservative bound on the\nregions the function might access. As with region names in\nprototypes, the programmer can override the default with\nan explicit effect. For example, iffnever dereferences its\nfirst argument, we can strengthen its prototype by adding\nan explicit effect as follows:\nint*ρ\n1\nf(int*ρ\n2\n, int*ρ\n1\n*ρ\n3\n;{ρ\n1\n,ρ\n3\n});\nIn practice, we have found default effects extremely useful.\nIndeed, for the 110,000 lines of Cyclone code we have thus\nfar, we have written one non-default effect.\nThe second major departure from TT is that we do not\nhaveeffect variables. Effect variables are used by TT for\nthree purposes: (1) to simulate subtyping in a unification-\nbased inference framework, (2) to abstract the set of regions\nthat a closure might need to access, and (3) to abstract the\nset of regions hidden by an abstract type.\nIn our original Cyclone design, we tried to use TT-style\neffect variables. However, we found that the approach does\nnot work well in an explicitly typed language for two rea-\nsons. First, the effect variables introduced by TT to support\neffect subtyping could occur free in only one location, and all\neffect variables had to be prenex quantified [26]. Their uni-\nfication algorithm depended crucially upon these structural\ninvariants. In an explicitly typed language, we found that\nenforcing these constraints was difficult. Furthermore, the\nprenex quantification restriction prevented first-class poly-\nmorphic functions, which Cyclone supports.\nSecond, we needed effect variables in some library inter-\nfaces, making the libraries harder to understand and use.\nConsider, for instance, a type for polymorphic sets:\nstruct Set<α, ρ, \u0004>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α;\u0004);\n}\nASetconsists of a list ofαelements, with the spine of the\nlist in regionρ. We do not know where the elements are\nallocated until we instantiateα. The comparison function\ncmpis used to determine set membership. Because the type\nof the elements is not yet known, the type of thecmpfunction\nmust use an effect variable\u0004to abstract the set of regions\nthat it might access when comparing the twoαvalues. And\nthis effect variable, like the type and region variable, must\nbe abstracted by theSetstructure.\nSuppose the library exports theSetstructure to clients\nabstractly (i.e., without revealing its definition):\nstruct Set<α, ρ, \u0004>;\nThe client must somehow discern the connection betweenα\nand\u0004,namelythat\u0004ismeanttoabstractthesetofregions\nwithinαthat the hidden comparison function might access.\n3.1 Avoiding Effect Variables\nTo simplify the system while retaining the benefit of effect\nvariables, we use a type operator,regions_of(τ).This\nnovel operator is just part of the type system; it does not\nexistatruntime. Intuitively,regions_of(τ)represents the\nset of regions that occur free inτ.Inparticular:\nregions_of(int)=∅\nregions_of(τ*ρ)={ρ}∪regions_of(τ)\nregions_of((τ\n1\n,...,τ\nn\n)→τ)=\nregions_of(τ\n1\n)∪···∪regions_of(τ\nn\n)∪regions_of(τ)\n\n286\n\nFor typ e variables,regions_of(α) is treated as an abstract\nset of region variables, much like effect variables. For ex-\nample,regions_of(α*ρ)={ρ}∪regions_of(α).The\ndefault effect of a function that hasαin its type simply\nincludesregions_of(α).\nWith the addition ofregions_of,wecanrewritetheSet\nexample as follows:\nstruct Set<α, ρ>{\nlist_t<α,ρ> elts;\nint (*cmp)(α,α; regions_of(α));\n}\nNow the connection between the type parameterαand the\ncomparison function’s effect is apparent, and the data struc-\nture no longer needs to be parameterized by an effect vari-\nable. Moreover,regions_of(α)is the default effect forint\n(*cmp)(α,α), so we need not write it.\nNow suppose we wish to build aSetvalue\nusing a particular comparison function:\nint cmp_ptr<ρ\n1\n>(int*ρ\n1\np1, int*ρ\n1\np2) {\nreturn (*p1) == (*p2);\n}\nSet build_set(list_te){\nreturn Set{.elts = e, .cmp = cmp_ptr<ρ\n1\n>};\n}\nThe default effect forcmp_ptris{ρ\n1\n}. After instantiatingα\nwithint*ρ\n1\n, the effect ofcmpbecomesregions_of(int*ρ\n1\n),\nwhich equals{ρ\n1\n}. As a result, the functionbuild_settype-\nchecks. In fact, using any function with a default effect will\nalways succeed. Consequently, programmers need not ex-\nplicitly mention effects when designing or using libraries.\nIn addition, unifying function types becomes somewhat\neasier with default effects because, given the same argument\nand result types, two functions have the same default effect.\n3.2 Interaction with Existential Types\nAs mentioned above, Cyclone supportsexistential types,\nwhich allow programmers to encode closures. For example,\nwe can give a type for “call-backs” that return anint:\nstruct IntFn∃α{ int (*func)(αenv);αenv;};\nHere, the call-back consists of a function pointer and some\nabstracted state that should be passed to the function. The\nαis existentially bound: Various objects of typestruct\nIntFncan instantiateαdifferently. When astruct IntFn\nobject is created, the type-checker ensures there is a type\nforαsuch that the fields are initialized correctly.\nTo access the fields of an existential object, we need to\n“open” them by giving a name to the bound type variable.\nFor example, we can write (in admittedly alien syntax):\nint apply_intfn(struct IntFn pkg) {\nlet IntFn{<β> .func = f,.env = y} = pkg;\nreturn f(y);\n}\nTheletform bindsftopkg.funcwith typeint (*)(β)\nandytopkg.envwith typeβ. So the function call appears\nwell-typed. However, the effect forfisregions_of(β)and\nwe have no evidence that these regions are still live, even\nthoughβis in scope. Indeed, the regions may not be live as\nthe following code demonstrates:\nint read<ρ>(int*ρx) { return *x; }\nstruct IntFn dangle() {\nL:{int x = 0;\nstruct IntFn ans =\n{ .func = read<ρ\nL\n>, .env = &x};\nreturn ans; }\n}\nHere, the abstracted typeαis instantiated withint*ρ\nL\nbe-\ncause the call-back’s environment is a pointer to anintin\nregionρ\nL\n. The function for the call-back just dereferences\nthe pointer it is passed. When packaged as an existential,\ntheint*ρ\nL\nis hidden and thus the result is well-typed de-\nspite the fact that the call-back has a dangling pointer.\nIn short, to usestruct IntFnobjects, we must “leak”\nenough information to prove a call is safe. Rather than re-\nsorting to effect variables, we giveregions_of(α)abound:\nstruct IntFn<ρ>∃α:>ρ{ ... };\nThe bound meansregions_of(α)must alloutliveρ;the\ntype-checker rejects an instantiation ofαin which the bound\nmay not hold. Therefore, ifpkghas typestruct IntFn<ρ>,\nthen we can callfso long asρis live. In practice, bounds\nreduce the “effect” of a call-back to a single region.\n4. FORMAL SOUNDNESS\nIn a separate technical report [15], we have defined an\noperational model of Core Cyclone, formalized the type sys-\ntem, and proven type soundness. Space constraints prevent\nus from including the material here, so we summarize the\nsalient details.\nCore Cyclone includes all of the features relevant to mem-\nory management, including stack allocation, dynamic re-\ngions, polymorphism, and existential types. The operational\nsemantics is a small-step, deterministic rewriting relation\n(→) from machine states to machine states. A machine\nstate is a triple (G, S, s) consisting of a garbage stackG,\nastackS, and a statements. The stacks are lists mapping\nregion names (ρ)toregions(R),whichinturnaremaps\nfrom locations (x)tovalues(v). The garbage stackGis\na technical device to record the deallocated storage so that\nthe program stays closed despite dangling pointers. Note,\nhowever, that the abstract machine becomes stuck if the\nprogram attempts to read or write a location in the garbage\nstack. The primary goal of the formalism is to prove that\nwell-typed programs cannot get stuck, so the garbage stack\n(the deallocated regions) need not exist during execution.\n4.1 Syntax\nFigure 3 gives BNF definitions for the syntax of the state-\nments, expressions, and types for Core Cyclone. Construc-\ntors (τ) define syntax for both types and regions. We use a\nkind discipline to determine whether a type variable repre-\nsents a type (T) or a region (R).\nTypes include pairs (τ\n1\n×τ\n2\n) to model structs. Like structs,\npairs are passed by value (i.e., copied). We do not dupli-\ncate polymorphic code, so pair types cannot instantiate type\nvariables because their values are larger than those of other\ntypes (i.e., they are at least two words). Types also include\ntype variables, universal types, and existential types. The\nquantifiers can range over types or regions and include re-\ngion constraints, which are used to specify partial orders on\nregion lifetimes. A region constraint (γ)isalistofprimitive\n\n287\n\nkindsκ::=T|R\ntypeandregionvarsα, ρ\nregion sets\u0004::=α\n1\n∪···∪α\nn\n∪{ρ\n1\n,...,ρ\nm\n}\nregion constraintsγ::=∅|γ, \u0004 <:ρ\nconstructorsτ::=α|int|τ\n1\n\u0001\n→τ\n2\n|τ\n1\n×τ\n2\n|τ∗ρ|handle(ρ)|∀α:κ\bγ.τ|∃α:κ\bγ.τ\nexpressionse::=x\nρ\n|v|e\bτ\t|(e\n1\n,e\n2\n)|e.i|∗e|rnew(e\n1\n)e\n2\n|\ne\n1\n(e\n2\n)|&e|e\n1\n=e\n2\n|pack[τ\n1\n,e]asτ\n2\nvaluesv::=i|f|&p|region(ρ)|(v\n1\n,v\n2\n)|pack[τ\n1\n,v]asτ\n2\npathsp::=x\nρ\n|p.i\nfunctionsf::=ρ:(τ\n1\nx\nρ\n)\n\u0001\n→τ\n2\n={s}|Λα:κ\bγ.f\nstatementss::=e|returne|s\n1\n;s\n2\n|if(e)s\n1\nelses\n2\n|while(e)s|\nρ:{τx\nρ\n=e;s}|region\bρ\tx\nρ\ns|ρ:{open[α, x\nρ\n]=e;s}|spop[ρ]\nFigure 3: Abstract Syntax of Core Cyclone\nconstraints of the form\u0004<:ρwhere\u0004is a region set, and\nρis a region. Intuitively, the constraint means that ifρis\nlive, then any of the regions in\u0004are live. Region sets can in-\nclude region variables (ρ)ortheregions_ofatypevariable.\n(We omit theregions_offor conciseness.) Finally, function\ntypes include a region set (\u0004), which specifies the function’s\neffect (i.e., the set of regions that must be live before calling\nthe function).\nStatements consist of expressions, return statements, com-\nposition, if statements, and while statements. In addition,\nthey include blocks (ρ:{τx\nρ\n=e;s}) for declaring a new\nstack region and a variable within that region, dynamic-\nregion declarations (region\bρ\tx\nρ\ns), and a form for opening\nvalues of existential type. Finally, statements include a spe-\ncial form “spop[ρ]” that, when executed, evaluatessto a\nterminal state and then deallocates (moves to the garbage\nstack) the regionρ. This form is not available to source\nprograms; it is used internally by the abstract machine as a\nmarker to indicate when to deallocate a region.\nExpressions include variablesx\nρ\n, which double as loca-\ntions. Each variablexlives in a given regionρ; formally\nx\nρ\nmakes this fact explicit. Other expressions are integers,\nfunctions, pointer dereference, function calls, the address-of\noperator, and assignment as in C. In addition, expressions\ninclude type instantiation, pairs, projection,rnew,andex-\nistential packages. Lastly, region handles (region(ρ)) are\na special form not available to source programs; creating a\ndynamic region withregion\bρ\tx\nρ\nsbindsx\nρ\ntoregion(ρ).\nRather than model individual memory locations, paths\nprovideasymbolicwaytorefertoacomponentofacom-\npound object. For instance, if the locationx\nρ\ncontains the\nvalue ((3,4),(5,6)), then the pathx\nρ\n.1 refers to (3,4), and\nx\nρ\n.1.2 refers to 4. As in C, ifpis a path, then &pis a value.\n4.2 Static Semantics\nThe most important typing judgment is the one for state-\nments. It has the form:\n∆; Γ;γ;\u0004;τ\n\nstmt\ns\nHere, ∆ records the type and region variables that are in\nscope, Γ records the value variables in scope and their types,\nγrecords partial-order constraints relating region lifetimes,\n\u0004records the capability (i.e., which regions in ∆ are con-\nsidered live), andτrecords the type thatemust have in\nany statement of the formreturne. We present just a few\ninteresting rules.\nType-checking statements requires checking that expres-\nsions have the correct types. For example, the rule for return\nstatements is:\n∆; Γ;γ;\u0004\ne:τ\n∆; Γ;γ;\u0004;τ\n\nstmt\nreturne\nExpressions must access only memory that can be proven\nlive from\u0004andγ. Here are two example rules:\nγ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\nx\nρ\n:Γ(x\nρ\n)\n∆; Γ;γ;\u0004\ne:τ∗ργ\n\u0004⇒ρ\n∆; Γ;γ;\u0004\n∗e:τ\nWe useγ\n\u0004⇒ρto proveρis live. Informally, we need a\nρ\n\u0002\n∈\u0004such that the partial orderγshowsρoutlivesρ\n\u0002\n.Of\ncourse,ρ∈\u0004suffices.\nWe use the same idea for our subsumption rule:\n∆; Γ;γ;\u0004\ne:τ∗ρ\n1\nγ\nρ\n2\n⇒ρ\n1\n∆; Γ;γ;\u0004\ne:τ∗ρ\n2\nTo type-check function calls, we useγ\n\u0004⇒\u0004\n1\nto mean\neveryαandρin\u0004\n1\ncanbeprovenlivefrom\u0004andγ.The\nrule is otherwise standard:\n∆; Γ;γ;\u0004\ne\n1\n:τ\n2\n\u0001\n1\n→τ∆; Γ;γ;\u0004\ne\n2\n:τ\n2\nγ\n\u0004⇒\u0004\n1\n∆; Γ;γ;\u0004\ne\n1\n(e\n2\n):τ\nHere is the rule for type instantiation:\n∆; Γ;γ;\u0004\ne:∀α:κ\bγ\n1\n.τ\n2\n∆\nτ\n1\n:κγ\nγ\n1\n[τ\n1\n/α]\n∆; Γ;γ;\u0004\ne\bτ\n1\n\t:τ\n2\n[τ\n1\n/α]\nThe only novelty is ensuring thatγestablishes the con-\nstraintsγ\n1\nused when type-checkinge. The judgmentγ\nγ\n\u0002\njust means for every\u0004<:ρinγ\n\u0002\n,wecanshowγ\nρ⇒\u0004.By\nabuse of notation, we writeτ\n2\n[τ\n1\n/α] for the capture-avoiding\nsubstitution ofτ\n1\nforαinτ\n2\nandγ\n1\n[τ\n1\n/α] for the substitu-\ntion ofregions\nof(τ\n1\n)forαinγ\n1\n.\nAnother necessary judgment for statements is\n\n\nret\ns\nIt ensures that if execution ofsterminates, then the ter-\nminal state will have the formreturnvfor some valuev.\nThis judgment, defined via a simple syntax-directed analy-\nsis, enforces that functions must not “fall off” — they always\nreturn values.\nTo set up the proof of soundness, we define a judgment to\nassert that a garbage stackGand stackScan be described\n\n288\n\nby the context ∆; Γ;γ:\n\n\nheap\n(G, S) : ∆; Γ;γ\nHere, ∆ is the set of region names that are bound in either\nGorS; Γ records the types of the locations bound in either\nGorS;andγrecords the regions’ relative lifetimes. In par-\nticular,γdescribes the total order of the regions inS.This\njudgment is used to connect assumptions that a statement\nmight make with the reality of the current heap.\nWith these judgments, we can state the Soundness Theo-\nrem for Core Cyclone:\nTheorem 4.1 (Soundness).If:\n1.\n\nheap\n(∅,[ρ\nH\n\r→R]) : ∆; Γ;γ,\n2.\n\nret\ns,\n3.∆; Γ;γ;{ρ\nH\n};int\n\nstmt\ns,and\n4.scontains nopopstatements\nthen either(G, S, s)runs forever or there exists aG\n\u0002\n,R\n\u0002\nand\nisuch that(G,[ρ\nH\n\r→R],s)→\n∗\n(G\n\u0002\n,[ρ\nH\n\r→R\n\u0002\n],returni).\nIn plain English, if we start with an empty garbage heap,\nand a stack that contains a single heap region ([ρ\nH\n\r→R])\nthat is well-formed, and if statements“doesn’t fall off,”\nandsis well-formed with respect to the type of the initial\nheap and returns only integers, andsdoes not containpop\nstatements, then the program cannot get stuck from type\nerrors or dangling-pointer dereferences. Furthermore, if the\nprogram terminates, all of the regions it allocated will have\nbeen freed and the program will return an integer.\nThe soundness proof, available in our companion techni-\ncal report [15], uses long and tedious progress and preserva-\ntion (subject-reduction) lemmas. Here we just sketch two\ncomplications from the proof of preservation. First, our\noperational semantics uses type substitution, for example\n(G, S,(Λα:κ\bγ.f)\bτ\t)→(G, S, f[τ/α]). As usual, we need\na substitution lemma in order to conclude the well-typedness\noff[τ/α] given the well-typedness of Λα:κ\bγ.f.Because\nof explicit effects and partial orders, proving the necessary\nsubstitution lemma requires several auxiliary lemmas, for\nexampleγ\n\u0004\n1\n⇒\u0004\n2\nimpliesγ[\u0004\n3\n/α]\n\u0004\n1\n[\u0004\n3\n/α]⇒\u0004\n2\n[\u0004\n3\n/α].\nSecond, we must weaken the theorem’s assumptions that\nthe heap has one region andshas nopopstatements, while\nstill proving that the program properly deallocates all the\nregions it allocates. To do so, we assume that given (G, S, s),\nwe can partitionSintoS\n1\nS\n2\nsuch thatsdeallocates all re-\ngions inS\n2\n(in last-in-first-out order) and none of the regions\ninS\n1\n. (To see this assumption is a proper weakening, let\nS\n1\n=[ρ\nH\n\r→R]andS\n2\n=∅.) This assumption (formalized\nas another judgment on statements) implies enough about\nthe position ofpopstatements insto prove that the pro-\ngrams\n\u0002\nresulting from a rewriting step properly deallocates\nexactly all of the live regions not inS\n1\n. In other words, the\nability to partitionSsuch that the necessary properties hold\nis preserved under evaluation.\n5.IMPLEMENTING CYCLONE REGIONS\nThe code-generation and run-time support for Cyclone\nregions is very simple. Heap and stack manipulation are\nexactly as in C. Dynamic regions are represented as linked\nlists of “pages” where each page is twice the size of the pre-\nvious one. A region handle points to the beginning of the list\nand the current “allocation point” on the last page, where\nrneworrmallocplace the next object. If there is insuffi-\ncient space for an object, a new page is allocated. Region\ndeallocation simply frees each page of the list.\nWhen the garbage collector is included, dynamic-region\nlist pages are acquired from the collector. The collector\nsupports explicit deallocation, which we use to free regions.\nIt is important to note that the collector simply treats the\nregion pages as large objects. As they are always reachable\nfrom the stack, they are scanned and any pointers to heap-\nallocated objects are found, ensuring that these objects are\npreserved. The advantage of this interface is its simplicity,\nbut at some cost: At collection time, every object in every\ndynamic region appears reachable, and thus all (live) dy-\nnamic regions must be scanned, and no objects within (or\nreachable from) dynamic regions are reclaimed.\nThe code generator ensures that regions are deallocated\neven when their lifetimes end due to unstructured control\nflow. For each intraprocedural jump orreturn,itiseasyto\ndetermine statically how many regions should be deallocated\nbefore transferring control.When throwing an exception,\nthe number of regions to deallocate is not known statically.\nTherefore, we store region handles and exception handlers in\nan integrated list that operates in a last-in-first-out manner.\nWhen an exception is thrown, we traverse the list deallocat-\ning regions until we reach an exception handler. We then\ntransfer control withlongjmp. In this fashion, we ensure\nthat a region is always deallocated when control returns.\n6. EXPERIMENTAL RESULTS\nTo simplify porting to and programming in Cyclone, we\nhave sought to minimize the number of required region an-\nnotations. Just as important, we have sought to achieve\ngood performance. In Sections 6.1 and 6.2, we analyze the\nburden of porting, in terms of added annotations, and find\nthat annotations impose negligible burden on the applica-\ntion writer, but a somewhat larger burden on the library\nwriter. In Section 6.3, we present a comparison of Cyclone’s\nperformance to that of C for our ported applications, and\nfind that while networking programs essentially perform the\nsame as C, compute-bound applications are up to a factor\nof three slower due to run-time checks and pointer represen-\ntations.\n6.1 Porting Application Code\nWe ported a number of applications and compared the\ndifferences in source code between the original and the Cy-\nclone version. We picked several networking applications\nbecause they are part of the “systems” domain in which\ncontrolling data representation is important. These include\na web server (mini_httpd), some web utilities (http_get,\nhttp_post,http_ping,andhttp_load), and a simple client\n(finger). We also used some computationally intense, older\nC applications that make heavy use of arrays and pointers;\nthese includecfrac,grobner,andtile. Finally, we ported\nthe compression utilitiescacmandncompress.\nWe took two approaches to porting. First, we changed\nall the programs as little as possible to make them correct\nCyclone programs. Then, forcfracandmini_httpd,we\nregionizedthe code: We made functions more region poly-\nmorphic and, where possible, eliminated heap allocation in\n\n289\n\nProgramLOCannotations\nCCycdiffstotallines\ncacm3403604100\ncfrac4218421513422\nfinger1581611733\ngrobner326034014527140\nhttpget5295304444\nhttpload207220581211513\nhttpping107210823311\nhttppost6076095188\nmatxmult57531131\nminihttpd3005302726644\nncompress19641986134109\ntile1345136514822\ntotal1862718847145212486\nregionized benchmarks\ncfrac42184192503158107\nminihttpd300529865318854\ntotal722371781034246161\nTable 1: Benchmark code differences\nfavor of dynamic region allocation withrnew. We also added\ncompiler-checked “not null” annotations to pointer types\nwhere possible to avoid some null checks.\nOur results are summarized in Table 1. For each pro-\ngram, Table 1 shows the number of lines of C and Cyclone\ncode, the number of differences between the two, and the\nregion annotations required in Cyclone. Thediffscolumn\nindicates the number of lines added or changed in porting\nfrom C to Cyclone. For the annotations, thetotalcolumn is\nthe number of individual region-related alterations, includ-\ning per-variable annotations and occurrences ofregion r\n{s}andrnew.Thelinescolumn is the total number of lines\nin the file that changed due to these annotations.\nThere are two interesting results regarding the difficulty of\nminimal porting. First, the overall changes in the programs\nare relatively small — less than 10% of the program code\nneeded to be changed. The vast majority of the differences\narise from pointer-syntax alterations. These changes are\ntypically easy to make — e.g., the type of strings are changed\nfromchar *tochar ?. We are currently experimenting\nwith interpretingchar *as a safe null-terminated string\ntype by default; doing so allows many fewer changes.\nThe most encouraging result is that the number of region\nannotations is small: only 124 changes (which account for\nroughly 6% of the total changes) in more than 18,000 lines of\ncode. The majority of these changes were completely triv-\nial, e.g., many programs required addingρ\nH\nannotations to\nargvso that arguments could be stored in global variables.\nThe program that required the most changes wasgrobner.\nInterestingly, the majority of these changes arose from the\nfact that in one place a stack pointer was being stored in a\nstructtype. We thereforeparameterized thestructdefini-\ntion with a region variable, and this parameterization then\npropagated through the rest of the code. However, the de-\nfault annotation still worked in many cases: out of 133 total\nvariable declarations of the parameterizedstructtype, only\n38 required annotations.\nThe cost of porting a program to use dynamic regions was\nalso reasonable; in this case roughly 13% of the total differ-\nences were region-related. For the web server, we were able\nto eliminate heap allocation entirely. Because it is event-\nLOCprotornewregion\nstring.h1395700\nstring-max.h13913500\nstring.cyc73968142\nlist.h3648500\nlist-max.h36417100\nlist.cyc81974380\nTable 2: Region annotations in libraries\ndriven, handling each request as it comes in, we changed\nthe main handler function to create a dynamic region and\nthen pass the region handle to its subroutines in a request\nstructure. After the request is serviced, the region is freed.\nThe majority of the overall changes arose from moving global\nvariables into the request structure and adding the structure\nas a parameter to various functions. This request structure\nis parameterized by a region, so many of the functions need\nannotations to connect the region of the request structure\nto that of another argument or return value.\nWe were less successful in regionizingcfrac.Asinthe\nweb server, we changed many functions to allocate using\nregion-handle parameters. It was easy to do dynamic region\nallocation and deallocation as part of the algorithm’s main\niteration, but for large inputs, it was difficult to keep regions\nfrom growing large before deallocation. We conclude that\ngarbage collection is a better match for this code, but others\nhave had more success with regions [12].\n6.2 Porting Library Code\nWe have ported a significant subset of the C and Caml\nlibraries to Cyclone. Two illustrative cases are the Cyclone\nlist and string libraries, ported from Caml and C respec-\ntively. Table 2 summarizes the region annotations in the in-\nterfaces and implementations of these libraries. As a rough\nmeasure of the effectiveness of default region annotations,\nwe also provide results for “maximally annotated” versions\nof the interfaces (list-max.h and string-max.h, respectively).\nTheprotocolumn lists the number of region type annota-\ntions that were necessary in function prototypes; thernew\ncolumn lists the number of uses ofrnew,andtheregioncol-\numn lists the number of uses of dynamic regions.\nWe found that library code requires more region annota-\ntions than application code, but most of these annotations\nare for the sake of convenience and generality rather than\nnecessity. Library functions that perform allocation often\ncome in two flavors: a heap allocating function that has the\nsame signature as the corresponding C or Caml function,\nand a version that takes an additional region handle for gen-\nerality; most annotations occur in the latter. Most of the\nchanges are to function prototypes; no explicit region anno-\ntations were necessary in the bodies of functions. The max-\nimally annotated interfaces require 2–2.4 times more region\nannotations; that is, the default region annotations suffice\n50–60% of the time. Most of the non-default region anno-\ntations were needed to express a “same-region” relationship\nbetween arguments and return types or to allow the func-\ntion to allocate into an arbitrary region; the remainder were\nneeded in type definitions. Moreover, no effect annotations\nwhatsoever were necessary.\nMost importantly, our applications, such as the compiler,\nuse the libraries extensively and region instantiation is im-\n\n290\n\nTestCtime(s)Cyclone time\nchecked(s)factorunchecked(s) factor\ncacm0.12±0.000.15±0.00 1.25×0.14±0.001.17×\ncfrac\n†\n2.30±0.005.57±0.01 2.42×4.77±0.012.07×\nfinger0.54±0.420.48±0.15 0.89×0.53±0.160.98×\ngrobner\n†\n0.03±0.000.07±0.00 2.85×0.07±0.002.49×\nhttpget0.32±0.030.33±0.02 1.03×0.32±0.061.00×\nhttpload\n†\n0.16±0.000.16±0.00 1.00×0.16±0.001.00×\nhttpping0.06±0.020.06±0.02 1.00×0.06±0.011.00×\nhttppost0.04±0.010.04±0.00 1.00×0.04±0.011.00×\nmatxmult1.37±0.001.50±0.00 1.09×1.37±0.001.00×\nminihttpd-1.15c2.05±0.002.09±0.00 1.02×2.09±0.001.02×\nncompress-4.2.40.14±0.010.19±0.00 1.36×0.18±0.001.29×\ntile\n†\n0.44±0.000.74±0.00 1.68×0.67±0.001.52×\n†\nCompiled with the garbage collector\nregionized benchmarks\ncfrac2.30±0.005.22±0.01 2.27×4.56±0.011.98×\nminihttpd2.30±0.002.35±0.00 1.02×2.35±0.001.02×\nTable 3: Benchmark performance\nplicit throughout them. The vast majority of library calls in\nported C code require no changes;malloc,realloc,memcpy,\netc., are essentially the only exceptions.\n6.3 Performance\nTable 3 shows the performance of the original C versions\nof our benchmark programs together with the Cyclone ver-\nsions with or without bounds-checks and null-checks. We\nran each benchmark twenty-one times on a 750 MHz Pen-\ntium III with 256MB of RAM, running Linux kernel 2.2.16-\n12, usinggcc2.96 as a back end. Thegccoptimization flags\nused for compiling both the original C code and the output\nof the Cyclone compiler were-O3 -march=i686.Because\nwe observed skewed distributions for the http benchmarks,\nwe report medians and semi-interquartile ranges (SIQR).\n1\nFor the non-web benchmarks (and some of the web bench-\nmarks) the median and mean were essentially identical, and\nthe standard deviation was at most 2% of the mean. The\nfactorcolumns for the Cyclone programs show the slowdown\nfactor relative to the C versions.\nWe achieve near-zero overhead for network or I/O bound\napplications such as the http clients and servers, but we pay\na substantial penalty for compute-intensive benchmarks; the\nworst isgrobner, which is almost a factor of three slower\nthan the C version. We have seen slowdowns of a factor of\nsix in pathological scenarios involving pointer arithmetic in\nsome microbenchmarks.\nTwo common sources of overhead in safe languages are\ngarbage collection and bounds checking. Garbage-collection\noverhead is not easy to measure in Cyclone, because re-\ngionizing a program can require significant work. As shown\nin Table 3, only a few of our benchmarks needed garbage\ncollection. Profiling the garbage collected version ofcfrac\nsuggests that garbage collection accounts for approximately\nhalf of its overhead. Partially regionizingcfracresulted\nin an 6% improvement. On the other hand,http_loadand\ntilemake relatively little use of dynamic allocation, so they\nhave almost no garbage-collection overhead. Therefore, we\n1\nThe semi-interquartile range is the difference between the high\nquartile and the low quartile divided by 2. This is a measure\nof variability, similar to standard deviation, recommended by\nJain [18] for skewed distributions.\nexpect that the overhead will vary widely for different pro-\ngrams depending on their memory-usage patterns.\nAs Table 3 demonstrates, bounds-checks are also an im-\nportant component of the overhead, but less than we ex-\npected. We found that a major cost is due to the repre-\nsentation of fat pointers. A fat pointer is represented with\nthree words: the base address, the bounds address, and the\ncurrent pointer location (essentially the same representation\nused by McGary’s bounded pointers [20]). The result is a\nlarger space overhead, largercache footprint, more parame-\nter passing and return-value copying, and increased register\npressure, especially on the register-impoverished x86.\nBecause fat pointers are currently the only pointer types\nin Cyclone that support pointer arithmetic and dynamically\nsized arrays, good fat-pointer performance is crucial to many\nCyclone programs. We found that slight changes to fat\npointer operations andgccflags relating to instruction selec-\ntion could have a huge impact on performance. In particular,\nreplacing inlined pointer operations with macros and setting\nthe architecture-specific instruction-selection flag properly\ndoubled the speed of some applications.\n7. RELATED WORK\nIn this paper, we have concentrated on the region-based\ntype system for Cyclone, which naturally supports C-style\nstack allocation, conventional heap allocation, and dynamic\nregion allocation. We feel that Cyclone is a unique and\npromising point in the programming-language design-space,\nbut many other systems share some features with Cyclone.\nMaking C Safe.Many systems, including but certainly\nnot limited to LCLint [10, 9], SLAM [3], Safe-C [2], and\nCCured [25], aim to make C code safe. Some of these sys-\ntems, such as LCLint, are meant to be static bug-finding\ntools. Like Cyclone, they usually require restricted coding\nidioms or additional annotations, but unlike Cyclone, they\noffer no soundness guarantees. In this way, these static tools\nreduce false positives. In contrast, Cyclone uses a combina-\ntion of a static type system (for memory management) and\nrun-time checks (for bounds violations) to minimize false\npositives.\n\n291\n\nOther systems, such as Safe-C and CCured, ensure sound-\nness by rewriting the code and adding run-time checks, at\nleast whenever an implementation-dependent static analy-\nsis cannot eliminate the checks. The primary advantage\nof these systems is that they require (almost) no changes\nto the C code, unlike Cyclone. However, they do not pre-\nserve the same data representations and lifetimes for ob-\njects. (Cyclone’sτ?pointers also use a wide representa-\ntion, but the use of these pointers is under programmer\ncontrol.) Furthermore, memory errors are caught at run\ntime instead of compile time. For instance, when an object\nis freed under CCured, the (entire) storage is not immedi-\nately reclaimed, but rather marked as inaccessible. Subse-\nquent accesses check the mark and signal an error when the\nobject is dereferenced. Ultimately, the mark is reclaimed\nwith a garbage collector to avoid leaks. Moreover, CCured\nmay move some stack-allocated objects to the heap to avoid\ndangling-pointer dereferences.\nStatic Regions.Tofte and Talpin’s seminal work [28] on\nimplementing ML with regions provides the foundation for\nregions in the ML Kit [27]. Programming with the Kit is\nconvenient, as the compiler automatically infers all region\nannotations. However, small changes to a program can have\ndrastic, unintuitive effects on object lifetimes. Thus, to pro-\ngram effectively, one must understand the analysis and try\nto control it indirectly by using certain idioms [27]. More\nrecent work for the ML Kit includes optional support for\ngarbage collection within regions [16].\nA number of extensions to the basic Tofte-Talpin frame-\nwork can avoid the constraints of LIFO region lifetimes. As\nexamples, the ML Kit includes a reset-region primitive [27];\nAiken et al. provide an analysis to free some regions early [1];\nand Walker et al. [29, 30] propose general systems for free-\ning regions based on linear types. All of these systems are\nmore expressive than our framework. For instance, the ideas\nin the Capability Calculus were used to implement type-safe\ngarbage collectorswithina language [31, 23]. However, these\nsystems were not designed for source-level programming.\nThey were designed as compiler intermediate languages or\nanalyses, so they can ignore issues such as minimizing an-\nnotations or providing control to the user.\nTwo other recent projects, Vault [7] and the work of Hen-\nglein et al. [17] aim to provide safe source-level control over\nmemory management using regions. Vault’s powerful type\nsystem allows a region to be freed before it leaves scope\nand its types can enforce that codemustfree a region. To\ndo so, Vault restricts region aliasing and tracks more fine-\ngrained effects. As a result, programming in Vault requires\nmore annotations. Nevertheless, we find Vault an extremely\npromising direction and hope to adapt some of these ideas to\nCyclone. Henglein et al. [17] have designed a flexible region\nsystem that does not require LIFO behavior. However, the\nsystem is monomorphic and first-order; it is unclear how to\nextend it to support polymorphism or existential types.\nFinally, both TAL [24] and the Microsoft CIL [13] provide\nsome support for type-safe stack allocation. But neither sys-\ntem allows programmers to mix stack and heap pointers, and\nboth systems place overly strong restrictions on how stack\npointers can be used. For instance, the Microsoft CIL pre-\nvents such pointers from being placed in data structures or\nreturned as results — features that language implementors\nneed for effective compilation [8].\nRegions in C.Perhaps the most closely related work is\nGay and Aiken’s RC [12] compiler and their earlier system,\nC@ [11]. As they note, region-based programming in C is an\nold idea; they contribute language support for efficient refer-\nence counting to detect if a region is deallocated while there\nremain pointers to it (that are not within it). This dynamic\nsystem has noapriorirestrictions on regions’ lifetimes and\na pointer can point anywhere, so the RC approach can en-\ncode more memory-management idioms. Like Cyclone, they\nprovide pointer annotations. These annotations are never\nrequired, but they are often crucial for performance because\nthey reduce the need for reference counting. One such an-\nnotation is very similar to our notion of region subtyping.\nRC uses reference counting only for dynamic regions. In\nfact, one annotation enforces that a pointer never points into\na dynamic region, so no reference counting is needed. As a\nresult, RC allows dangling pointers into the stack or heap.\nOther kinds of type errors also remain. Indeed, we found\na number of array-bounds bugs in two of the benchmarks\nused to evaluate RC:grobnerandtile. Finally, RC cannot\nsupport the kind of polymorphism that Cyclone does be-\ncause the RC compiler must know statically which objects\nare pointers.\nIn summary, some of these systems are more convenient\nto use than Cyclone (e.g., CCured and the MLKit) but take\naway control over memory management. Some of the static\nsystems (e.g., the Capability Calculus) provide more pow-\nerful region constructs, but were designed as intermediate\nlanguages and do not have the programming convenience of\nCyclone. Other systems (e.g., RC, Safe-C) are more flexible\nbut offer no static guarantees.\n8. FUTURE WORK\nA great deal of work remains to achieve our goals of pro-\nvidingatooltomovelegacycodetoatype-safeenvironment\neasily and providing a type-safe language for building sys-\ntems where control over data representations and memory\nmanagement is an issue.\nIn the near future, we hope to incorporate support for\ndeallocating dynamic regions early. We have experimented\nbriefly with linear type systems in the style of the Capability\nCalculus or Vault, but have found that this approach is gen-\nerally too restrictive, especially in the context of exceptions.\nInstead, we are currently developing a traditional intrapro-\ncedural flow analysis to track region aliasing and region life-\ntimes. Again, for the interprocedural case, we expect to add\nsupport for explicit annotations, and to use experimental\nevidence to drive the choice of defaults.\nWe also expect to incorporate better support for first-class\nregions, in the style of RC. The goal is to give programmers\na sufficient range of options that they can use the statically\nchecked regions most of the time, but fall back on the dy-\nnamically checked regions when needed.\nIn addition to enhancements to the region system, work is\nneeded in other areas. For instance, we have seen run-time\noverheads ranging from 1x to 3x for the benchmarks pre-\nsented here, and overheads as high as 6x for some compute-\nintensive microbenchmarks. We are currently working to\nidentify the bottlenecks, but a clear problem is with our\nrepresentation of pointers to dynamically sized arrays (?\npointers). To support dynamically sized arrays and bounds-\nchecks, we tag such arrays with implicit size information.\n\n292\n\nSimilarly, to support type-safe, discriminated unions, we\nadd implicit tags. We are adapting ideas from DML [33]\nand Xanadu [32] to make these tags explicit so that pro-\ngrammers can control where these tags are placed. We hope\ndoing so will make it easier to interface with legacy C code\nor devices that do not expect these tags on the data, and to\nsupport time-saving and space-saving optimizations. How-\never, we have found that the DML framework does not easily\nextend to imperative languages such as Cyclone. In partic-\nular, there are subtle issues involving existential types and\nthe address-of (&) operator [14].\nAcknowledgments\nWe would like to thank David Walker for fruitful discussions,\nand Steve Zdancewic and Jeff Vinocur for proofreading this\nmanuscript.\n9.REFERENCES\n[1] A. Aiken, M. F ̈ahndrich, and R. Levien. Better static\nmemory management: Improving region-based analysis of\nhigher-order languages. InACM Conference on\nProgramming Language Design and Implementation,pages\n174–185, La Jolla, CA, 1995.\n[2] T. M. Austin, S. E. Breach, and G. S. Sohi. Efficient\ndetection of all pointer and array access errors. InACM\nConference on Programming Language Design and\nImplementation, pages 290–301, Orlando, FL, June 1994.\n[3] T. Ball and S. K. Rajamani. Automatically validating\ntemporal safety properties of interfaces. InSPIN 2001,\nWorkshop on Model Checking of Software, volume 2057 of\nLecture Notes in Computer Science, pages 103–122,\nToronto, Canada, May 2001. Springer-Verlag.\n[4] H.-J. Boehm and M. Weiser. Garbage collection in an\nuncooperative environment.Software Practice and\nExperience, 18(9):807–820, 1988.\n[5] K. B. Bruce, L. Cardelli, and B. C. Pierce. Comparing\nobject encodings.Information and Computation,\n155:108–133, 1999.\n[6] Cyclone user’s manual. Technical Report 2001-1855,\nDepartment of Computer Science, Cornell University, Nov.\n2001. Current version at\nhttp://www.cs.cornell.edu/projects/cyclone/.\n[7] R. DeLine and M. F ̈ahndrich. Enforcing high-level\nprotocols in low-level software. InACM Conference on\nProgramming Language Design and Implementation,pages\n59–69, Snowbird, UT, June 2001.\n[8] T. Dowd, F. Henderson, and P. Ross. Compiling Mercury\nto the .NET common language runtime. In N. Benton and\nA. Kennedy, editors,BABEL’01: First International\nWorkshop on Multi-Language Infrastructure and\nInteroperability,volume59.1ofElectronic Notes in\nTheoretical Computer Science, Florence, Italy, Sept. 2001.\n[9] D. Evans. LCLint user’s guide.\nhttp://lclint.cs.virginia.edu/guide/.\n[10] D. Evans. Static detection of dynamic memory errors. In\nACM Conference on Programming Language Design and\nImplementation, pages 44–53, Philadelphia, PA, May 1996.\n[11] D. Gay and A. Aiken. Memory management with explicit\nregions. InACM Conference on Programming Language\nDesign and Implementation, pages 313–323, Montreal,\nCanada, June 1998.\n[12] D. Gay and A. Aiken. Language support for regions. In\nACM Conference on Programming Language Design and\nImplementation, pages 70–80, Snowbird, UT, June 2001.\n[13] A. D. Gordon and D. Syme. Typing a multi-language\nintermediate code. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 248–260,\nLondon, United Kingdom, Jan. 2001.\n[14] D. Grossman. Existential types for imperative languages. In\nEleventh European Symposium on Programming,pages\n21–35, Grenoble, France, Apr. 2002.\n[15] D.Grossman,G.Morrisett,Y.Wang,T.Jim,M.Hicks,\nand J. Cheney. Formal type soundness for Cyclone’s region\nsystem. Technical Report 2001-1856, Department of\nComputer Science, Cornell University, Nov. 2001.\n[16] N. Hallenberg, M. Elsman, and M. Tofte. Combining region\ninference and garbage collection. InACM Conference on\nProgramming Language Design and Implementation,\nBerlin, Germany, June 2002. This volume.\n[17] F. Henglein, H. Makholm, and H. Niss. A direct approach\nto control-flow sensitive region-based memory management.\nInThird International Conference on Principles and\nPractice of Declarative Programming, Florence, Italy, Sept.\n2001.\n[18] R. Jain.The Art of Computer Systems Performance\nAnalysis. Wiley, 1991.\n[19] T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney,\nand Y. Wang. Cyclone: A safe dialect of C. InUSENIX\nAnnual Technical Conference, Monterey, CA, June 2002.\n[20] G. McGary. Bounds checking projects.http:\n//www.gnu.org/software/gcc/projects/bp/main.html.\n[21] Y. Minamide, G. Morrisett, and R. Harper. Typed closure\nconversion. InTwenty-Third ACM Symposium on\nPrinciples of Programming Languages, pages 271–283, St.\nPetersburg, FL, Jan. 1996.\n[22] J. Mitchell and G. Plotkin. Abstract types have existential\ntype.ACM Transactions on Progamming Languages and\nSystems, 10(3):470–502, 1988. Preliminary version in\nTwelfth ACM Symposium on Principles of Programming\nLanguages, 1985.\n[23] S. Monnier, B. Saha, and Z. Shao. Principled scavenging. In\nACM Conference on Programming Language Design and\nImplementation, pages 81–91, Snowbird, UT, June 2001.\n[24] G. Morrisett, K. Crary, N. Glew, and D. Walker.\nStack-based typed assembly language. InWorkshop on\nTypes in Compilation, volume 1473 ofLecture Notes in\nComputer Science, pages 28–52, Kyoto, Japan, Mar. 1998.\nSpringer-Verlag.\n[25] G. C. Necula, S. McPeak, and W. Weimer. CCured:\nType-safe retrofitting of legacy code. InTwenty-Ninth\nACM Symposium on Principles of Programming\nLanguages, pages 128–139, Portland, OR, Jan. 2002.\n[26] M. Tofte and L. Birkedal. A region inference algorithm.\nACM Transactions on Progamming Languages and\nSystems, 20(4):734–767, July 1998.\n[27] M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H.\nOlesen, and P. Sestoft. Programming with regions in the\nML Kit (for version 4). Technical report, IT University of\nCopenhagen, Sept. 2001.\n[28] M. Tofte and J.-P. Talpin. Region-based memory\nmanagement.Information and Computation,\n132(2):109–176, 1997.\n[29] D. Walker, K. Crary, and G. Morrisett. Typed memory\nmanagement in a calculus of capabilities.ACM\nTransactions on Progamming Languages and Systems,\n24(4):701–771, July 2000.\n[30] D. Walker and K. Watkins. On regions and linear types. In\nSixth ACM International Conference on Functional\nProgramming, pages 181–192, Florence, Italy, Sept. 2001.\n[31] D. C. Wang and A. W. Appel. Type-preserving garbage\ncollectors. InTwenty-Eighth ACM Symposium on\nPrinciples of Programming Languages, pages 166–178,\nLondon, United Kingdom, Jan. 2001.\n[32] H. Xi. Imperative programming with dependent types. In\nFifteenth IEEE Symposium on Logic in Computer Science,\npages 375–387, Santa Barbara, CA, June 2000.\n[33] H. Xi and F. Pfenning. Dependent types in practical\nprogramming. InTwenty-Sixth ACM Symposium on\nPrinciples of Programming Languages, pages 214–227, San\nAntonio, TX, Jan. 1999.\n\n293", + "dataFromCrossref": { + "indexed": { + "date-parts": [ + [ + 2024, + 1, + 29 + ] + ], + "date-time": "2024-01-29T15:59:19Z", + "timestamp": 1706543959870 + }, + "publisher-location": "New York, NY, USA", + "reference-count": 32, + "publisher": "ACM", + "content-domain": { + "domain": [ + "dl.acm.org" + ], + "crossmark-restriction": true + }, + "published-print": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "DOI": "10.1145/512529.512563", + "type": "proceedings-article", + "created": { + "date-parts": [ + [ + 2004, + 4, + 19 + ] + ], + "date-time": "2004-04-19T17:18:43Z", + "timestamp": 1082395123000 + }, + "update-policy": "http://dx.doi.org/10.1145/crossmark-policy", + "source": "Crossref", + "is-referenced-by-count": 229, + "title": "Region-based memory management in cyclone", + "prefix": "10.1145", + "author": [ + { + "given": "Dan", + "family": "Grossman", + "sequence": "first", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Greg", + "family": "Morrisett", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Trevor", + "family": "Jim", + "sequence": "additional", + "affiliation": [ + { + "name": "AT&T Labs Research, Florham Park, NJ" + } + ] + }, + { + "given": "Michael", + "family": "Hicks", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "Yanling", + "family": "Wang", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + }, + { + "given": "James", + "family": "Cheney", + "sequence": "additional", + "affiliation": [ + { + "name": "Cornell University, Ithaca, NY" + } + ] + } + ], + "member": "320", + "published-online": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "reference": [ + { + "key": "e_1_3_2_1_1_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/207110.207137" + }, + { + "key": "e_1_3_2_1_2_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/178243.178446" + }, + { + "key": "e_1_3_2_1_3_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/380921.380932" + }, + { + "key": "e_1_3_2_1_4_1", + "doi-asserted-by": "publisher", + "DOI": "10.1002/spe.4380180902" + }, + { + "key": "e_1_3_2_1_5_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1999.2829" + }, + { + "key": "e_1_3_2_1_6_1", + "volume-title": "Technical Report 2001-1855", + "year": "2001", + "unstructured": "Cyclone user's manual. Technical Report 2001-1855 , Department of Computer Science , Cornell University , Nov. 2001 . Current version at http://www.cs.cornell.edu/projects/cyclone/ Cyclone user's manual. Technical Report 2001-1855, Department of Computer Science, Cornell University, Nov. 2001. Current version at http://www.cs.cornell.edu/projects/cyclone/" + }, + { + "key": "e_1_3_2_1_7_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378811" + }, + { + "key": "e_1_3_2_1_8_1", + "volume-title": "BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability", + "volume": "59", + "author": "Dowd T.", + "year": "2001", + "unstructured": "T. Dowd , F. Henderson , and P. Ross . Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors , BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability , volume 59 .1 of Electronic Notes in Theoretical Computer Science, Florence, Italy , Sept. 2001 T. Dowd, F. Henderson, and P. Ross. Compiling Mercury to the .NET common language runtime. In N. Benton and A. Kennedy, editors, BABEL'01: First International Workshop on Multi-Language Infrastructure and Interoperability, volume 59.1 of Electronic Notes in Theoretical Computer Science, Florence, Italy, Sept. 2001" + }, + { + "key": "e_1_3_2_1_9_1", + "unstructured": "D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/ D. Evans. LCLint user's guide. http://lclint.cs.virginia.edu/guide/" + }, + { + "key": "e_1_3_2_1_10_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/231379.231389" + }, + { + "key": "e_1_3_2_1_11_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/277650.277748" + }, + { + "key": "e_1_3_2_1_12_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378815" + }, + { + "key": "e_1_3_2_1_13_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360228" + }, + { + "key": "e_1_3_2_1_14_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/645396.651967" + }, + { + "key": "e_1_3_2_1_16_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/512529.512547" + }, + { + "key": "e_1_3_2_1_17_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/773184.773203" + }, + { + "key": "e_1_3_2_1_18_1", + "volume-title": "The Art of Computer Systems Performance Analysis", + "author": "Jain R.", + "year": "1991", + "unstructured": "R. Jain . The Art of Computer Systems Performance Analysis . Wiley , 1991 R. Jain. The Art of Computer Systems Performance Analysis. Wiley, 1991" + }, + { + "key": "e_1_3_2_1_19_1", + "volume-title": "USENIX Annual Technical Conference", + "author": "Jim T.", + "year": "2002", + "unstructured": "T. Jim , G. Morrisett , D. Grossman , M. Hicks , J. Cheney , and Y. Wang . Cyclone: A safe dialect of C . In USENIX Annual Technical Conference , Monterey, CA , June 2002 T. Jim, G. Morrisett, D. Grossman, M. Hicks, J. Cheney, and Y. Wang. Cyclone: A safe dialect of C. In USENIX Annual Technical Conference, Monterey, CA, June 2002" + }, + { + "key": "e_1_3_2_1_20_1", + "unstructured": "G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html G. McGary. Bounds checking projects. http://www.gnu.org/software/gcc/projects/bp/main.html" + }, + { + "key": "e_1_3_2_1_21_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/237721.237791" + }, + { + "key": "e_1_3_2_1_22_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/44501.45065" + }, + { + "key": "e_1_3_2_1_23_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/378795.378817" + }, + { + "key": "e_1_3_2_1_24_1", + "doi-asserted-by": "publisher", + "DOI": "10.5555/647228.719245" + }, + { + "key": "e_1_3_2_1_25_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/503272.503286" + }, + { + "key": "e_1_3_2_1_26_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/291891.291894" + }, + { + "key": "e_1_3_2_1_27_1", + "volume-title": "Programming with regions in the ML Kit (for version 4). Technical report", + "author": "Tofte M.", + "year": "2001", + "unstructured": "M. Tofte , L. Birkedal , M. Elsman , N. Hallenberg , T. H. Olesen , and P. Sestoft . Programming with regions in the ML Kit (for version 4). Technical report , IT University of Copenhagen , Sept. 2001 M. Tofte, L. Birkedal, M. Elsman, N. Hallenberg, T. H. Olesen, and P. Sestoft. Programming with regions in the ML Kit (for version 4). Technical report, IT University of Copenhagen, Sept. 2001" + }, + { + "key": "e_1_3_2_1_28_1", + "doi-asserted-by": "publisher", + "DOI": "10.1006/inco.1996.2613" + }, + { + "key": "e_1_3_2_1_29_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/363911.363923" + }, + { + "key": "e_1_3_2_1_30_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/507635.507658" + }, + { + "key": "e_1_3_2_1_31_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/360204.360218" + }, + { + "key": "e_1_3_2_1_32_1", + "first-page": "375", + "volume-title": "Fifteenth IEEE Symposium on Logic in Computer Science", + "author": "Xi H.", + "year": "2000", + "unstructured": "H. Xi . Imperative programming with dependent types . In Fifteenth IEEE Symposium on Logic in Computer Science , pages 375 -- 387 , Santa Barbara, CA , June 2000 H. Xi. Imperative programming with dependent types. In Fifteenth IEEE Symposium on Logic in Computer Science, pages 375--387, Santa Barbara, CA, June 2000" + }, + { + "key": "e_1_3_2_1_33_1", + "doi-asserted-by": "publisher", + "DOI": "10.1145/292540.292560" + } + ], + "event": "PLDI02: ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation", + "container-title": "Proceedings of the ACM SIGPLAN 2002 conference on Programming language design and implementation", + "original-title": [], + "link": [ + { + "URL": "https://dl.acm.org/doi/pdf/10.1145/512529.512563", + "content-type": "unspecified", + "content-version": "vor", + "intended-application": "similarity-checking" + } + ], + "deposited": { + "date-parts": [ + [ + 2023, + 9, + 4 + ] + ], + "date-time": "2023-09-04T21:19:02Z", + "timestamp": 1693862342000 + }, + "score": 1, + "resource": { + "primary": { + "URL": "https://dl.acm.org/doi/10.1145/512529.512563" + } + }, + "subtitle": [], + "short-title": [], + "issued": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "references-count": 32, + "alternative-id": [ + "10.1145/512529.512563", + "10.1145/512529" + ], + "URL": "http://dx.doi.org/10.1145/512529.512563", + "relation": { + "is-identical-to": [ + { + "id-type": "doi", + "id": "10.1145/543552.512563", + "asserted-by": "object" + } + ] + }, + "published": { + "date-parts": [ + [ + 2002, + 5, + 17 + ] + ] + }, + "assertion": [ + { + "value": "2002-05-17", + "order": 2, + "name": "published", + "label": "Published", + "group": { + "name": "publication_history", + "label": "Publication History" + } + } + ] + } + }, + "arxiv_1704.04861": { + "path": [ + "mobilenet.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nMobileNets: Efficient Convolutional Neural Networks for Mobile Vision\nApplications\nAndrew G. HowardMenglong ZhuBo ChenDmitry Kalenichenko\nWeijun WangTobias WeyandMarco AndreettoHartwig Adam\nGoogle Inc.\n{howarda,menglong,bochen,dkalenichenko,weijunw,weyand,anm,hadam}@google.com\nAbstract\nWe present a class of efficient models called MobileNets\nfor mobile and embedded vision applications. MobileNets\nare based on a streamlined architecture that uses depth-\nwise separable convolutions to build light weight deep\nneural networks. We introduce two simple global hyper-\nparameters that efficiently trade off between latency and\naccuracy. These hyper-parameters allow the model builder\nto choose the right sized model for their application based\non the constraints of the problem. We present extensive\nexperiments on resource and accuracy tradeoffs and show\nstrong performance compared to other popular models on\nImageNet classification. We then demonstrate the effective-\nness of MobileNets across a wide range of applications and\nuse cases including object detection, finegrain classifica-\ntion, face attributes and large scale geo-localization.\n1. Introduction\nConvolutional neural networks have become ubiquitous\nin computer vision ever since AlexNet [19] popularized\ndeep convolutional neural networks by winning the Ima-\ngeNet Challenge: ILSVRC 2012 [24]. The general trend\nhas been to make deeper and more complicated networks\nin order to achieve higher accuracy [27, 31, 29, 8]. How-\never, these advances to improve accuracy are not necessar-\nily making networks more efficient with respect to size and\nspeed. In many real world applications such as robotics,\nself-driving car and augmented reality, the recognition tasks\nneed to be carried out in a timely fashion on a computation-\nally limited platform.\nThis paper describes an efficient network architecture\nand a set of two hyper-parameters in order to build very\nsmall, low latency models that can be easily matched to the\ndesign requirements for mobile and embedded vision ap-\nplications. Section 2 reviews prior work in building small\nmodels. Section 3 describes the MobileNet architecture and\ntwo hyper-parameters width multiplier and resolution mul-\ntiplier to define smaller and more efficient MobileNets. Sec-\ntion 4 describes experiments on ImageNet as well a variety\nof different applications and use cases. Section 5 closes\nwith a summary and conclusion.\n2. Prior Work\nThere has been rising interest in building small and effi-\ncient neural networks in the recent literature, e.g. [16, 34,\n12, 36, 22]. Many different approaches can be generally\ncategorized into either compressing pretrained networks or\ntraining small networks directly. This paper proposes a\nclass of network architectures that allows a model devel-\noper to specifically choose a small network that matches\nthe resource restrictions (latency, size) for their application.\nMobileNets primarily focus on optimizing for latency but\nalso yield small networks. Many papers on small networks\nfocus only on size but do not consider speed.\nMobileNets are built primarily from depthwise separable\nconvolutions initially introduced in [26] and subsequently\nused in Inception models [13] to reduce the computation in\nthe first few layers. Flattened networks [16] build a network\nout of fully factorized convolutions and showed the poten-\ntial of extremely factorized networks. Independent of this\ncurrent paper, Factorized Networks[34] introduces a similar\nfactorized convolution as well as the use of topological con-\nnections. Subsequently, the Xception network [3] demon-\nstrated how to scale up depthwise separable filters to out\nperform Inception V3 networks. Another small network is\nSqueezenet [12] which uses a bottleneck approach to design\na very small network. Other reduced computation networks\ninclude structured transform networks [28] and deep fried\nconvnets [37].\nA different approach for obtaining small networks is\nshrinking, factorizing or compressing pretrained networks.\nCompression based on product quantization [36], hashing\n1\narXiv:1704.04861v1 [cs.CV] 17 Apr 2017\n\nProprietary + Confidential\nLandmark Recognition\nFinegrain Classification\nObject Detection\nMobileNets\nPhoto by Sharon VanderKaay (CC BY 2.0)\nPhoto by Juanedc (CC BY 2.0)\nPhoto by HarshLight (CC BY 2.0)\nFace Attributes\nGoogle Doodle by Sarah Harrison\nFigure 1. MobileNet models can be applied to various recognition tasks for efficient on device intelligence.\n[2], and pruning, vector quantization and Huffman coding\n[5] have been proposed in the literature. Additionally var-\nious factorizations have been proposed to speed up pre-\ntrained networks [14, 20]. Another method for training\nsmall networks is distillation [9] which uses a larger net-\nwork to teach a smaller network. It is complementary to\nour approach and is covered in some of our use cases in\nsection 4. Another emerging approach is low bit networks\n[4, 22, 11].\n3. MobileNet Architecture\nIn this section we first describe the core layers that Mo-\nbileNet is built on which are depthwise separable filters.\nWe then describe the MobileNet network structure and con-\nclude with descriptions of the two model shrinking hyper-\nparameters width multiplier and resolution multiplier.\n3.1. Depthwise Separable Convolution\nThe MobileNet model is based on depthwise separable\nconvolutions which is a form of factorized convolutions\nwhich factorize a standard convolution into a depthwise\nconvolution and a1×1convolution called a pointwise con-\nvolution. For MobileNets the depthwise convolution ap-\nplies a single filter to each input channel. The pointwise\nconvolution then applies a1×1convolution to combine the\noutputs the depthwise convolution. A standard convolution\nboth filters and combines inputs into a new set of outputs\nin one step. The depthwise separable convolution splits this\ninto two layers, a separate layer for filtering and a separate\nlayer for combining. This factorization has the effect of\ndrastically reducing computation and model size. Figure 2\nshows how a standard convolution 2(a) is factorized into a\ndepthwise convolution 2(b) and a1×1pointwise convolu-\ntion 2(c).\nA standard convolutional layer takes as input aD\nF\n×\nD\nF\n×Mfeature mapFand produces aD\nF\n×D\nF\n×N\nfeature mapGwhereD\nF\nis the spatial width and height\nof a square input feature map\n1\n,Mis the number of input\nchannels (input depth),D\nG\nis the spatial width and height of\na square output feature map andNis the number of output\nchannel (output depth).\nThe standard convolutional layer is parameterized by\nconvolution kernelKof sizeD\nK\n×D\nK\n×M×NwhereD\nK\nis the spatial dimension of the kernel assumed to be square\nandMis number of input channels andNis the number of\noutput channels as defined previously.\nThe output feature map for standard convolution assum-\ning stride one and padding is computed as:\nG\nk,l,n\n=\n∑\ni,j,m\nK\ni,j,m,n\n·F\nk+i−1,l+j−1,m\n(1)\nStandard convolutions have the computational cost of:\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n(2)\nwhere the computational cost depends multiplicatively on\nthe number of input channelsM, the number of output\nchannelsNthe kernel sizeD\nk\n×D\nk\nand the feature map\nsizeD\nF\n×D\nF\n. MobileNet models address each of these\nterms and their interactions. First it uses depthwise separa-\nble convolutions to break the interaction between the num-\nber of output channels and the size of the kernel.\nThe standard convolution operation has the effect of fil-\ntering features based on the convolutional kernels and com-\nbining features in order to produce a new representation.\nThe filtering and combination steps can be split into two\nsteps via the use of factorized convolutions called depthwise\n1\nWe assume that the output feature map has the same spatial dimen-\nsions as the input and both feature maps are square. Our model shrinking\nresults generalize to feature maps with arbitrary sizes and aspect ratios.\n\nseparable convolutions for substantial reduction in compu-\ntational cost.\nDepthwise separable convolution are made up of two\nlayers: depthwise convolutions and pointwise convolutions.\nWe use depthwise convolutions to apply a single filter per\neach input channel (input depth). Pointwise convolution, a\nsimple1×1convolution, is then used to create a linear com-\nbination of the output of the depthwise layer. MobileNets\nuse both batchnorm and ReLU nonlinearities for both lay-\ners.\nDepthwise convolution with one filter per input channel\n(input depth) can be written as:\nˆ\nG\nk,l,m\n=\n∑\ni,j\nˆ\nK\ni,j,m\n·F\nk+i−1,l+j−1,m\n(3)\nwhere\nˆ\nKis the depthwise convolutional kernel of size\nD\nK\n×D\nK\n×Mwhere them\nth\nfilter in\nˆ\nKis applied to\nthem\nth\nchannel inFto produce them\nth\nchannel of the\nfiltered output feature map\nˆ\nG.\nDepthwise convolution has a computational cost of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n(4)\nDepthwise convolution is extremely efficient relative to\nstandard convolution. However it only filters input chan-\nnels, it does not combine them to create new features. So\nan additional layer that computes a linear combination of\nthe output of depthwise convolution via1×1convolution\nis needed in order to generate these new features.\nThe combination of depthwise convolution and1×1\n(pointwise) convolution is called depthwise separable con-\nvolution which was originally introduced in [26].\nDepthwise separable convolutions cost:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\n(5)\nwhich is the sum of the depthwise and1×1pointwise con-\nvolutions.\nBy expressing convolution as a two step process of filter-\ning and combining we get a reduction in computation of:\nD\nK\n·D\nK\n·M·D\nF\n·D\nF\n+M·N·D\nF\n·D\nF\nD\nK\n·D\nK\n·M·N·D\nF\n·D\nF\n=\n1\nN\n+\n1\nD\n2\nK\nMobileNet uses3×3depthwise separable convolutions\nwhich uses between 8 to 9 times less computation than stan-\ndard convolutions at only a small reduction in accuracy as\nseen in Section 4.\nAdditional factorization in spatial dimension such as in\n[16, 31] does not save much additional computation as very\nlittle computation is spent in depthwise convolutions.\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(a) Standard Convolution Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(b) Depthwise Convolutional Filters\n...\n...\n...\nM\nM\nM\nD\nK\nD\nK\nD\nK\nD\nK\nN\nN\n1\n1\n1\n(c)1×1Convolutional Filters called Pointwise Convolution in the con-\ntext of Depthwise Separable Convolution\nFigure 2. The standard convolutional filters in (a) are replaced by\ntwo layers: depthwise convolution in (b) and pointwise convolu-\ntion in (c) to build a depthwise separable filter.\n3.2. Network Structure and Training\nThe MobileNet structure is built on depthwise separable\nconvolutions as mentioned in the previous section except for\nthe first layer which is a full convolution. By defining the\nnetwork in such simple terms we are able to easily explore\nnetwork topologies to find a good network. The MobileNet\narchitecture is defined in Table 1. All layers are followed by\na batchnorm [13] and ReLU nonlinearity with the exception\nof the final fully connected layer which has no nonlinearity\nand feeds into a softmax layer for classification. Figure 3\ncontrasts a layer with regular convolutions, batchnorm and\nReLU nonlinearity to the factorized layer with depthwise\nconvolution,1×1pointwise convolution as well as batch-\nnorm and ReLU after each convolutional layer. Down sam-\npling is handled with strided convolution in the depthwise\nconvolutions as well as in the first layer. A final average\npooling reduces the spatial resolution to 1 before the fully\nconnected layer. Counting depthwise and pointwise convo-\nlutions as separate layers, MobileNet has 28 layers.\nIt is not enough to simply define networks in terms of a\nsmall number of Mult-Adds. It is also important to make\nsure these operations can be efficiently implementable. For\n\n3x3 Depthwise Conv\nBN\n1x1 Conv\nBN\nReLU\nReLU\n3x3 Conv\nBN\nReLU\nFigure 3. Left: Standard convolutional layer with batchnorm and\nReLU. Right: Depthwise Separable convolutions with Depthwise\nand Pointwise layers followed by batchnorm and ReLU.\ninstance unstructured sparse matrix operations are not typ-\nically faster than dense matrix operations until a very high\nlevel of sparsity. Our model structure puts nearly all of the\ncomputation into dense1×1convolutions. This can be im-\nplemented with highly optimized general matrix multiply\n(GEMM) functions. Often convolutions are implemented\nby a GEMM but require an initial reordering in memory\ncalled im2col in order to map it to a GEMM. For instance,\nthis approach is used in the popular Caffe package [15].\n1×1convolutions do not require this reordering in memory\nand can be implemented directly with GEMM which is one\nof the most optimized numerical linear algebra algorithms.\nMobileNet spends95%of it’s computation time in1×1\nconvolutions which also has75%of the parameters as can\nbe seen in Table 2. Nearly all of the additional parameters\nare in the fully connected layer.\nMobileNet models were trained in TensorFlow [1] us-\ning RMSprop [33] with asynchronous gradient descent sim-\nilar to Inception V3 [31]. However, contrary to training\nlarge models we use less regularization and data augmen-\ntation techniques because small models have less trouble\nwith overfitting. When training MobileNets we do not use\nside heads or label smoothing and additionally reduce the\namount image of distortions by limiting the size of small\ncrops that are used in large Inception training [31]. Addi-\ntionally, we found that it was important to put very little or\nno weight decay (l2 regularization) on the depthwise filters\nsince their are so few parameters in them. For the ImageNet\nbenchmarks in the next section all models were trained with\nsame training parameters regardless of the size of the model.\n3.3. Width Multiplier: Thinner Models\nAlthough the base MobileNet architecture is already\nsmall and low latency, many times a specific use case or\napplication may require the model to be smaller and faster.\nIn order to construct these smaller and less computationally\nexpensive models we introduce a very simple parameterα\ncalled width multiplier. The role of the width multiplierαis\nto thin a network uniformly at each layer. For a given layer\nTable 1. MobileNet Body Architecture\nType / StrideFilter ShapeInput Size\nConv / s23×3×3×32224×224×3\nConv dw / s13×3×32dw112×112×32\nConv / s11×1×32×64112×112×32\nConv dw / s23×3×64dw112×112×64\nConv / s11×1×64×12856×56×64\nConv dw / s13×3×128dw56×56×128\nConv / s11×1×128×12856×56×128\nConv dw / s23×3×128dw56×56×128\nConv / s11×1×128×25628×28×128\nConv dw / s13×3×256dw28×28×256\nConv / s11×1×256×25628×28×256\nConv dw / s23×3×256dw28×28×256\nConv / s11×1×256×51214×14×256\n5×\nConv dw / s13×3×512dw14×14×512\nConv / s11×1×512×51214×14×512\nConv dw / s23×3×512dw14×14×512\nConv / s11×1×512×10247×7×512\nConv dw / s23×3×1024dw7×7×1024\nConv / s11×1×1024×10247×7×1024\nAvg Pool / s1Pool7×77×7×1024\nFC / s11024×10001×1×1024\nSoftmax / s1Classifier1×1×1000\nTable 2. Resource Per Layer Type\nTypeMult-AddsParameters\nConv1×194.86%74.59%\nConv DW3×33.06%1.06%\nConv3×31.19%0.02%\nFully Connected0.18%24.33%\nand width multiplierα, the number of input channelsMbe-\ncomesαMand the number of output channelsNbecomes\nαN.\nThe computational cost of a depthwise separable convo-\nlution with width multiplierαis:\nD\nK\n·D\nK\n·αM·D\nF\n·D\nF\n+αM·αN·D\nF\n·D\nF\n(6)\nwhereα∈(0,1]with typical settings of 1, 0.75, 0.5 and\n0.25.α= 1is the baseline MobileNet andα <1are\nreduced MobileNets. Width multiplier has the effect of re-\nducing computational cost and the number of parameters\nquadratically by roughlyα\n2\n. Width multiplier can be ap-\nplied to any model structure to define a new smaller model\nwith a reasonable accuracy, latency and size trade off. It\nis used to define a new reduced structure that needs to be\ntrained from scratch.\n3.4. Resolution Multiplier: Reduced Representa-\ntion\nThe second hyper-parameter to reduce the computational\ncost of a neural network is a resolution multiplierρ. We ap-\n\nTable 3. Resource usage for modifications to standard convolution.\nNote that each row is a cumulative effect adding on top of the\nprevious row. This example is for an internal MobileNet layer\nwithD\nK\n= 3,M= 512,N= 512,D\nF\n= 14.\nLayer/ModificationMillionMillion\nMult-AddsParameters\nConvolution4622.36\nDepthwise Separable Conv52.30.27\nα= 0.7529.60.15\nρ= 0.71415.10.15\nply this to the input image and the internal representation of\nevery layer is subsequently reduced by the same multiplier.\nIn practice we implicitly setρby setting the input resolu-\ntion.\nWe can now express the computational cost for the core\nlayers of our network as depthwise separable convolutions\nwith width multiplierαand resolution multiplierρ:\nD\nK\n·D\nK\n·αM·ρD\nF\n·ρD\nF\n+αM·αN·ρD\nF\n·ρD\nF\n(7)\nwhereρ∈(0,1]which is typically set implicitly so that\nthe input resolution of the network is 224, 192, 160 or 128.\nρ= 1is the baseline MobileNet andρ <1are reduced\ncomputation MobileNets. Resolution multiplier has the ef-\nfect of reducing computational cost byρ\n2\n.\nAs an example we can look at a typical layer in Mo-\nbileNet and see how depthwise separable convolutions,\nwidth multiplier and resolution multiplier reduce the cost\nand parameters. Table 3 shows the computation and number\nof parameters for a layer as architecture shrinking methods\nare sequentially applied to the layer. The first row shows\nthe Mult-Adds and parameters for a full convolutional layer\nwith an input feature map of size14×14×512with a ker-\nnelKof size3×3×512×512. We will look in detail\nin the next section at the trade offs between resources and\naccuracy.\n4. Experiments\nIn this section we first investigate the effects of depth-\nwise convolutions as well as the choice of shrinking by re-\nducing the width of the network rather than the number of\nlayers. We then show the trade offs of reducing the net-\nwork based on the two hyper-parameters: width multiplier\nand resolution multiplier and compare results to a number\nof popular models. We then investigate MobileNets applied\nto a number of different applications.\n4.1. Model Choices\nFirst we show results for MobileNet with depthwise sep-\narable convolutions compared to a model built with full con-\nvolutions. In Table 4 we see that using depthwise separa-\nble convolutions compared to full convolutions only reduces\nTable 4. Depthwise Separable vs Full Convolution MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\nConv MobileNet71.7%486629.3\nMobileNet70.6%5694.2\nTable 5. Narrow vs Shallow MobileNet\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.75 MobileNet68.4%3252.6\nShallow MobileNet65.3%3072.9\nTable 6. MobileNet Width Multiplier\nWidth MultiplierImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n0.75 MobileNet-22468.4%3252.6\n0.5 MobileNet-22463.7%1491.3\n0.25 MobileNet-22450.6%410.5\nTable 7. MobileNet Resolution\nResolutionImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\n1.0 MobileNet-19269.1%4184.2\n1.0 MobileNet-16067.2%2904.2\n1.0 MobileNet-12864.4%1864.2\naccuracy by1%on ImageNet was saving tremendously on\nmult-adds and parameters.\nWe next show results comparing thinner models with\nwidth multiplier to shallower models using less layers. To\nmake MobileNet shallower, the5layers of separable filters\nwith feature size14×14×512in Table 1 are removed.\nTable 5 shows that at similar computation and number of\nparameters, that making MobileNets thinner is3%better\nthan making them shallower.\n4.2. Model Shrinking Hyperparameters\nTable 6 shows the accuracy, computation and size trade\noffs of shrinking the MobileNet architecture with the width\nmultiplierα. Accuracy drops off smoothly until the archi-\ntecture is made too small atα= 0.25.\nTable 7 shows the accuracy, computation and size trade\noffs for different resolution multipliers by training Mo-\nbileNets with reduced input resolutions. Accuracy drops\noff smoothly across resolution.\nFigure 4 shows the trade off between ImageNet Accu-\nracy and computation for the 16 models made from the\ncross product of width multiplierα∈ {1,0.75,0.5,0.25}\nand resolutions{224,192,160,128}. Results are log linear\nwith a jump when models get very small atα= 0.25.\n\nFigure 4. This figure shows the trade off between computation\n(Mult-Adds) and accuracy on the ImageNet benchmark. Note the\nlog linear dependence between accuracy and computation.\nFigure 5. This figure shows the trade off between the number of\nparameters and accuracy on the ImageNet benchmark. The colors\nencode input resolutions. The number of parameters do not vary\nbased on the input resolution.\nFigure 5 shows the trade off between ImageNet Ac-\ncuracy and number of parameters for the 16 models\nmade from the cross product of width multiplierα∈\n{1,0.75,0.5,0.25}and resolutions{224,192,160,128}.\nTable 8 compares full MobileNet to the original\nGoogleNet [30] and VGG16 [27]. MobileNet is nearly\nas accurate as VGG16 while being 32 times smaller and\n27 times less compute intensive. It is more accurate than\nGoogleNet while being smaller and more than 2.5 times less\ncomputation.\nTable 9 compares a reduced MobileNet with width mul-\ntiplierα= 0.5and reduced resolution160×160. Reduced\nMobileNet is4%better than AlexNet [19] while being45×\nsmaller and9.4×less compute than AlexNet. It is also4%\nbetter than Squeezenet [12] at about the same size and22×\nless computation.\nTable 8. MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n1.0 MobileNet-22470.6%5694.2\nGoogleNet69.8%15506.8\nVGG 1671.5%15300138\nTable 9. Smaller MobileNet Comparison to Popular Models\nModelImageNetMillionMillion\nAccuracyMult-AddsParameters\n0.50 MobileNet-16060.2%761.32\nSqueezenet57.5%17001.25\nAlexNet57.2%72060\nTable 10. MobileNet for Stanford Dogs\nModelTop-1MillionMillion\nAccuracyMult-AddsParameters\nInception V3 [18]84%500023.2\n1.0 MobileNet-22483.3%5693.3\n0.75 MobileNet-22481.9%3251.9\n1.0 MobileNet-19281.9%4183.3\n0.75 MobileNet-19280.5%2391.9\nTable 11. Performance of PlaNet using the MobileNet architec-\nture. Percentages are the fraction of the Im2GPS test dataset that\nwere localized within a certain distance from the ground truth. The\nnumbers for the original PlaNet model are based on an updated\nversion that has an improved architecture and training dataset.\nScaleIm2GPS [7] PlaNet [35]PlaNet\nMobileNet\nContinent (2500 km)51.9%77.6%79.3%\nCountry (750 km)35.4%64.0%60.3%\nRegion (200 km)32.1%51.1%45.2%\nCity (25 km)21.9%31.7%31.7%\nStreet (1 km)2.5%11.0%11.4%\n4.3. Fine Grained Recognition\nWe train MobileNet for fine grained recognition on the\nStanford Dogs dataset [17]. We extend the approach of [18]\nand collect an even larger but noisy training set than [18]\nfrom the web. We use the noisy web data to pretrain a fine\ngrained dog recognition model and then fine tune the model\non the Stanford Dogs training set. Results on Stanford Dogs\ntest set are in Table 10. MobileNet can almost achieve the\nstate of the art results from [18] at greatly reduced compu-\ntation and size.\n4.4. Large Scale Geolocalizaton\nPlaNet [35] casts the task of determining where on earth\na photo was taken as a classification problem. The approach\ndivides the earth into a grid of geographic cells that serve as\nthe target classes and trains a convolutional neural network\n\non millions of geo-tagged photos. PlaNet has been shown\nto successfully localize a large variety of photos and to out-\nperform Im2GPS [6, 7] that addresses the same task.\nWe re-train PlaNet using the MobileNet architecture on\nthe same data. While the full PlaNet model based on the In-\nception V3 architecture [31] has 52 million parameters and\n5.74 billion mult-adds. The MobileNet model has only 13\nmillion parameters with the usual 3 million for the body and\n10 million for the final layer and 0.58 Million mult-adds.\nAs shown in Tab. 11, the MobileNet version delivers only\nslightly decreased performance compared to PlaNet despite\nbeing much more compact. Moreover, it still outperforms\nIm2GPS by a large margin.\n4.5. Face Attributes\nAnother use-case for MobileNet is compressing large\nsystems with unknown or esoteric training procedures. In\na face attribute classification task, we demonstrate a syner-\ngistic relationship between MobileNet and distillation [9],\na knowledge transfer technique for deep networks. We\nseek to reduce a large face attribute classifier with75\nmillion parameters and1600million Mult-Adds.The\nclassifier is trained on a multi-attribute dataset similar to\nYFCC100M [32].\nWe distill a face attribute classifier using the MobileNet\narchitecture. Distillation [9] works by training the classi-\nfier to emulate the outputs of a larger model\n2\ninstead of the\nground-truth labels, hence enabling training from large (and\npotentially infinite) unlabeled datasets. Marrying the scal-\nability of distillation training and the parsimonious param-\neterization of MobileNet, the end system not only requires\nno regularization (e.g. weight-decay and early-stopping),\nbut also demonstrates enhanced performances. It is evi-\ndent from Tab. 12 that the MobileNet-based classifier is re-\nsilient to aggressive model shrinking: it achieves a similar\nmean average precision across attributes (mean AP) as the\nin-house while consuming only1%the Multi-Adds.\n4.6. Object Detection\nMobileNet can also be deployed as an effective base net-\nwork in modern object detection systems. We report results\nfor MobileNet trained for object detection on COCO data\nbased on the recent work that won the 2016 COCO chal-\nlenge [10]. In table 13, MobileNet is compared to VGG\nand Inception V2 [13] under both Faster-RCNN [23] and\nSSD [21] framework. In our experiments, SSD is evaluated\nwith 300 input resolution (SSD 300) and Faster-RCNN is\ncompared with both 300 and 600 input resolution (Faster-\nRCNN 300, Faster-RCNN 600). The Faster-RCNN model\nevaluates 300 RPN proposal boxes per image. The models\nare trained on COCO train+val excluding 8k minival images\n2\nThe emulation quality is measured by averaging the per-attribute\ncross-entropy over all attributes.\nTable 12. Face attribute classification using the MobileNet archi-\ntecture. Each row corresponds to a different hyper-parameter set-\nting (width multiplierαand image resolution).\nWidth Multiplier /MeanMillionMillion\nResolutionAPMult-Adds Parameters\n1.0 MobileNet-224 88.7%5683.2\n0.5 MobileNet-224 88.1%1490.8\n0.25 MobileNet-224 87.2%450.2\n1.0 MobileNet-128 88.1%1853.2\n0.5 MobileNet-128 87.7%480.8\n0.25 MobileNet-128 86.4%150.2\nBaseline86.9%16007.5\nTable 13. COCO object detection results comparison using differ-\nent frameworks and network architectures. mAP is reported with\nCOCO primary challenge metric (AP at IoU=0.50:0.05:0.95)\nFrameworkModelmAPBillionMillion\nResolutionMult-Adds Parameters\ndeeplab-VGG 21.1%34.933.1\nSSD 300Inception V2 22.0%3.813.7\nMobileNet19.3%1.26.8\nFaster-RCNNVGG22.9%64.3138.5\n300Inception V2 15.4%118.213.3\nMobileNet16.4%25.26.1\nFaster-RCNNVGG25.7%149.6138.5\n600Inception V2 21.9%129.613.3\nMobilenet19.8%30.56.1\nFigure 6. Example objection detection results using MobileNet\nSSD.\nand evaluated on minival. For both frameworks, MobileNet\nachieves comparable results to other networks with only a\nfraction of computational complexity and model size.\n4.7. Face Embeddings\nThe FaceNet model is a state of the art face recognition\nmodel [25]. It builds face embeddings based on the triplet\nloss. To build a mobile FaceNet model we use distillation\nto train by minimizing the squared differences of the output\n\nTable 14. MobileNet Distilled from FaceNet\nModel1e-4MillionMillion\nAccuracyMult-AddsParameters\nFaceNet [25]83%16007.5\n1.0 MobileNet-16079.4%2864.9\n1.0 MobileNet-12878.3%1855.5\n0.75 MobileNet-12875.2%1663.4\n0.75 MobileNet-12872.5%1083.8\nof FaceNet and MobileNet on the training data. Results for\nvery small MobileNet models can be found in table 14.\n5. Conclusion\nWe proposed a new model architecture called Mo-\nbileNets based on depthwise separable convolutions. We\ninvestigated some of the important design decisions leading\nto an efficient model. We then demonstrated how to build\nsmaller and faster MobileNets using width multiplier and\nresolution multiplier by trading off a reasonable amount of\naccuracy to reduce size and latency. We then compared dif-\nferent MobileNets to popular models demonstrating supe-\nrior size, speed and accuracy characteristics. We concluded\nby demonstrating MobileNet’s effectiveness when applied\nto a wide variety of tasks. As a next step to help adoption\nand exploration of MobileNets, we plan on releasing mod-\nels in Tensor Flow.\nReferences\n[1] M. Abadi, A. Agarwal, P. Barham, E. Brevdo, Z. Chen,\nC. Citro, G. S. Corrado, A. Davis, J. Dean, M. Devin, et al.\nTensorflow: Large-scale machine learning on heterogeneous\nsystems, 2015.Software available from tensorflow. org, 1,\n2015. 4\n[2] W. Chen, J. T. Wilson, S. Tyree, K. Q. Weinberger, and\nY. Chen. Compressing neural networks with the hashing\ntrick.CoRR, abs/1504.04788, 2015. 2\n[3] F. Chollet. Xception: Deep learning with depthwise separa-\nble convolutions.arXiv preprint arXiv:1610.02357v2, 2016.\n1\n[4] M. Courbariaux, J.-P. David, and Y. Bengio. Training deep\nneural networks with low precision multiplications.arXiv\npreprint arXiv:1412.7024, 2014. 2\n[5] S. Han, H. Mao, and W. J. Dally. Deep compression: Com-\npressing deep neural network with pruning, trained quantiza-\ntion and huffman coding.CoRR, abs/1510.00149, 2, 2015.\n2\n[6] J. Hays and A. Efros. IM2GPS: estimating geographic in-\nformation from a single image. InProceedings of the IEEE\nInternational Conference on Computer Vision and Pattern\nRecognition, 2008. 7\n[7] J. Hays and A. Efros. Large-Scale Image Geolocalization.\nIn J. Choi and G. Friedland, editors,Multimodal Location\nEstimation of Videos and Images. Springer, 2014. 6, 7\n[8] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learn-\ning for image recognition.arXiv preprint arXiv:1512.03385,\n2015. 1\n[9] G. Hinton, O. Vinyals, and J. Dean. Distilling the knowledge\nin a neural network.arXiv preprint arXiv:1503.02531, 2015.\n2, 7\n[10] J. Huang, V. Rathod, C. Sun, M. Zhu, A. Korattikara,\nA. Fathi, I. Fischer, Z. Wojna, Y. Song, S. Guadarrama, et al.\nSpeed/accuracy trade-offs for modern convolutional object\ndetectors.arXiv preprint arXiv:1611.10012, 2016. 7\n[11] I. Hubara, M. Courbariaux, D. Soudry, R. El-Yaniv, and\nY. Bengio. Quantized neural networks: Training neural net-\nworks with low precision weights and activations.arXiv\npreprint arXiv:1609.07061, 2016. 2\n[12] F. N. Iandola, M. W. Moskewicz, K. Ashraf, S. Han, W. J.\nDally, and K. Keutzer. Squeezenet: Alexnet-level accuracy\nwith 50x fewer parameters and¡ 1mb model size.arXiv\npreprint arXiv:1602.07360, 2016. 1, 6\n[13] S. Ioffe and C. Szegedy. Batch normalization: Accelerating\ndeep network training by reducing internal covariate shift.\narXiv preprint arXiv:1502.03167, 2015. 1, 3, 7\n[14] M. Jaderberg, A. Vedaldi, and A. Zisserman. Speeding up\nconvolutional neural networks with low rank expansions.\narXiv preprint arXiv:1405.3866, 2014. 2\n[15] Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Gir-\nshick, S. Guadarrama, and T. Darrell.Caffe: Convolu-\ntional architecture for fast feature embedding.arXiv preprint\narXiv:1408.5093, 2014. 4\n[16] J. Jin, A. Dundar, and E. Culurciello. Flattened convolutional\nneural networks for feedforward acceleration.arXiv preprint\narXiv:1412.5474, 2014. 1, 3\n[17] A. Khosla, N. Jayadevaprakash, B. Yao, and L. Fei-Fei.\nNovel dataset for fine-grained image categorization. InFirst\nWorkshop on Fine-Grained Visual Categorization, IEEE\nConference on Computer Vision and Pattern Recognition,\nColorado Springs, CO, June 2011. 6\n[18] J. Krause, B. Sapp, A. Howard, H. Zhou, A. Toshev,\nT. Duerig, J. Philbin, and L. Fei-Fei. The unreasonable ef-\nfectiveness of noisy data for fine-grained recognition.arXiv\npreprint arXiv:1511.06789, 2015. 6\n[19] A. Krizhevsky, I. Sutskever, and G. E. Hinton. Imagenet\nclassification with deep convolutional neural networks. In\nAdvances in neural information processing systems, pages\n1097–1105, 2012. 1, 6\n[20] V. Lebedev, Y. Ganin, M. Rakhuba, I. Oseledets, and\nV. Lempitsky.Speeding-up convolutional neural net-\nworks using fine-tuned cp-decomposition.arXiv preprint\narXiv:1412.6553, 2014. 2\n[21] W. Liu, D. Anguelov, D. Erhan, C. Szegedy, and S. Reed.\nSsd:Single shot multibox detector.arXiv preprint\narXiv:1512.02325, 2015. 7\n[22] M. Rastegari, V. Ordonez, J. Redmon, and A. Farhadi. Xnor-\nnet: Imagenet classification using binary convolutional neu-\nral networks.arXiv preprint arXiv:1603.05279, 2016. 1, 2\n[23] S. Ren, K. He, R. Girshick, and J. Sun. Faster r-cnn: Towards\nreal-time object detection with region proposal networks. In\nAdvances in neural information processing systems, pages\n91–99, 2015. 7\n\n[24] O. Russakovsky, J. Deng, H. Su, J. Krause, S. Satheesh,\nS. Ma, Z. Huang, A. Karpathy, A. Khosla, M. Bernstein,\net al.Imagenet large scale visual recognition challenge.\nInternational Journal of Computer Vision, 115(3):211–252,\n2015. 1\n[25] F. Schroff, D. Kalenichenko, and J. Philbin. Facenet: A uni-\nfied embedding for face recognition and clustering. InPro-\nceedings of the IEEE Conference on Computer Vision and\nPattern Recognition, pages 815–823, 2015. 8\n[26] L. Sifre.Rigid-motion scattering for image classification.\nPhD thesis, Ph. D. thesis, 2014. 1, 3\n[27] K. Simonyan and A. Zisserman. Very deep convolutional\nnetworks for large-scale image recognition.arXiv preprint\narXiv:1409.1556, 2014. 1, 6\n[28] V. Sindhwani, T. Sainath, and S. Kumar. Structured trans-\nforms for small-footprint deep learning.InAdvances in\nNeural Information Processing Systems, pages 3088–3096,\n2015. 1\n[29] C. Szegedy, S. Ioffe, and V. Vanhoucke.Inception-v4,\ninception-resnet and the impact of residual connections on\nlearning.arXiv preprint arXiv:1602.07261, 2016. 1\n[30] C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed,\nD. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich.\nGoing deeper with convolutions. InProceedings of the IEEE\nConference on Computer Vision and Pattern Recognition,\npages 1–9, 2015. 6\n[31] C. Szegedy, V. Vanhoucke, S. Ioffe, J. Shlens, and Z. Wojna.\nRethinking the inception architecture for computer vision.\narXiv preprint arXiv:1512.00567, 2015. 1, 3, 4, 7\n[32] B. Thomee, D. A. Shamma, G. Friedland, B. Elizalde, K. Ni,\nD. Poland, D. Borth, and L.-J. Li. Yfcc100m: The new\ndata in multimedia research.Communications of the ACM,\n59(2):64–73, 2016. 7\n[33] T. Tieleman and G. Hinton. Lecture 6.5-rmsprop: Divide\nthe gradient by a running average of its recent magnitude.\nCOURSERA: Neural Networks for Machine Learning, 4(2),\n2012. 4\n[34] M. Wang, B. Liu, and H. Foroosh. Factorized convolutional\nneural networks.arXiv preprint arXiv:1608.04337, 2016. 1\n[35] T. Weyand, I. Kostrikov, and J. Philbin. PlaNet - Photo Ge-\nolocation with Convolutional Neural Networks. InEuropean\nConference on Computer Vision (ECCV), 2016. 6, 7\n[36] J. Wu, C. Leng, Y. Wang, Q. Hu, and J. Cheng. Quantized\nconvolutional neural networks for mobile devices.arXiv\npreprint arXiv:1512.06473, 2015. 1\n[37] Z. Yang, M. Moczulski, M. Denil, N. de Freitas, A. Smola,\nL. Song, and Z. Wang. Deep fried convnets. InProceedings\nof the IEEE International Conference on Computer Vision,\npages 1476–1483, 2015. 1", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1704.04861v1", + "updated": "2017-04-17T03:57:34Z", + "published": "2017-04-17T03:57:34Z", + "title": "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision\n Applications", + "summary": " We present a class of efficient models called MobileNets for mobile and\nembedded vision applications. MobileNets are based on a streamlined\narchitecture that uses depth-wise separable convolutions to build light weight\ndeep neural networks. We introduce two simple global hyper-parameters that\nefficiently trade off between latency and accuracy. These hyper-parameters\nallow the model builder to choose the right sized model for their application\nbased on the constraints of the problem. We present extensive experiments on\nresource and accuracy tradeoffs and show strong performance compared to other\npopular models on ImageNet classification. We then demonstrate the\neffectiveness of MobileNets across a wide range of applications and use cases\nincluding object detection, finegrain classification, face attributes and large\nscale geo-localization.\n", + "author": [ + { + "name": "Andrew G. Howard" + }, + { + "name": "Menglong Zhu" + }, + { + "name": "Bo Chen" + }, + { + "name": "Dmitry Kalenichenko" + }, + { + "name": "Weijun Wang" + }, + { + "name": "Tobias Weyand" + }, + { + "name": "Marco Andreetto" + }, + { + "name": "Hartwig Adam" + } + ], + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1704.04861v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1704.04861v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "path_onnx loop [jendeley no id].pdf": { + "path": [ + "onnx loop [jendeley no id].pdf" + ], + "title": "onnx loop [jendeley no id].pdf", + "idType": "path", + "tags": [], + "authors": [], + "comments": "", + "text": "\n\n▸ logsoftmax\n▸ logsoftmax_axis\nLoop\nGeneric Looping construct. This loop has multiple termination conditions:\n1. Trip count. Iteration count specified at runtime. Set by specifying the input M.\nOptional. Set to empty string to omit. Note that a static trip count (specified at\ngraph construction time) can be specified by passing in a constant node for\ninput M.\n2. Loop termination condition. This is an input to the op that determines whether to\nrun the first iteration and also a loop-carried dependency for the body graph.\nThe body graph must yield a value for the condition variable, whether this input\nis provided or not.\nThis table summarizes the operating modes of this operator with equivalent C-style\ncode:\n Operator inputs defined as (max_trip_count, condition_var).\n input (\"\", \"\"):\n for (int i=0; ; ++i) {\n cond = ... // Note this value is ignored, but is required in \nthe body\n }\n input (\"\", cond) // Note this is analogous to a while loop\n bool cond = ...;\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (\"\", 1) // Note this is analogous to a do-while loop\n bool cond = true\n for (int i=0; cond; ++i) {\n cond = ...;\n }\n input (trip_count, \"\") // Note this is analogous to a for loop\n int trip_count = ...\n for (int i=0; i < trip_count; ++i) {\n cond = ...; // ignored\n }\n input (trip_count, cond)\n int trip_count = ...;\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n100 / 2452022/03/05 12:21\n\nSample usage - cond as well as trip count\nSample equivalent C code\n bool cond = ...;\n for (int i=0; i < trip_count && cond; ++i) {\n cond = ...;\n }\n graph predict-net {\n %a = Constant[value = ]()\n %b = Constant[value = ]()\n %keepgoing = Constant[value = ]()\n %max_trip_count = Constant[value = ]()\n %keepgoing_out, %b_out, %user_defined_vals = Loop[body = ](%max_trip_count, %keepgoing, %b)\n return\n }\n graph body-net (\n %i[INT32, scalar] // iteration number\n %keepgoing_in[BOOL, scalar] // incoming loop-termination-\ncondition; not used\n %b_in[INT32, scalar] // incoming value of loop-carried-\ndependency b\n ) {\n %my_local = Add(%a, %b_in)\n %b_out = Sub(%a, %b_in) // outgoing value of loop-carried-\ndependency b\n %keepgoing_out = Greater(%my_local, %b_out) // outgoing loop-\ntermination-condition\n %user_defined_val = Add(%b_in, %b_in) // scan-output value to be \naccumulated\n return %keepgoing_out, %b_out, %user_defined_val\n }\n {\n /* User-defined code (enclosing scope) */\n int a = 3, b = 6;\n bool keepgoing = true; // Analogous to input cond\n /* End user-defined code */\n /* Implicitly-defined code */\n const int max_trip_count = 10; // Analogous to input M\n int user_defined_vals[]; // Imagine this is resizable\n /* End implicitly-defined code */\n /* initialize loop-carried variables and scan-output variables */\n bool keepgoing_out = keepgoing\n int b_out = b\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n101 / 2452022/03/05 12:21\n\nThere are several things of note in this code snippet:\n1. Values from the enclosing scope (i.e. variable \"a\" here) are in scope and can be\nreferenced in the inputs of the loop.\n2. Any values computed in the loop body that needs to be used in a subsequent\niteration or after the loop are modelled using a pair of variables in the loop-body,\nconsisting of an input variable (eg., b_in) and an output variable (eg., b_out).\nThese are referred to as loop-carried dependences. The loop operation node\nsupplies the input value of the input variable for the first iteration, and returns the\noutput value of the output variable produced by the final iteration.\n3. Scan_output variables are used to implicitly concatenate values computed\nacross all the iterations. In the above example, the value of user_defined_val\ncomputed over all iterations are concatenated and returned as the value of\nuser_defined_vals after the loop.\n4. Values created in the body cannot be accessed in the enclosing scope, except\nusing the mechanism described above.\n for (int i=0; i < max_trip_count && keepgoing_out; ++i) {\n /* Implicitly-defined code: bind actual parameter values\n to formal parameter variables of loop-body */\n bool keepgoing_in = keepgoing_out;\n bool b_in = b_out;\n /* User-defined code (loop body) */\n int my_local = a + b_in; // Reading value \"a\" from the \nenclosing scope is fine\n b_out = a - b_in;\n keepgoing_out = my_local > b_out;\n user_defined_val = b_in + b_in; // b_in and b_out are different \nvariables\n /* End user-defined code */\n /* Implicitly defined-code */\n user_defined_vals[i] = user_defined_val // accumulate scan-\noutput values\n }\n // int t = my_local; // Can't do this. my_local is not accessible \nhere.\n // The values below are bound to the output variables of the loop \nand therefore accessible\n // b_out; user_defined_vals; keepgoing_out;\n }\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n102 / 2452022/03/05 12:21\n\nNote that the semantics of this op support \"diagonal\" or \"wavefront\" execution. (See\nStep 3 here for an example: https://devblogs.nvidia.com/optimizing-recurrent-neural-\nnetworks-cudnn-5/). Frontends should emit multi-layer RNNs as a series of While\noperators (with time being the inner looping dimension), with each successive layer\nconsuming the scan_outputs from the previous layer, possibly going through several\npoint-wise operators (e.g. dropout, residual connections, linear layer).\nThe input/output of subgraph (produced by loop node) matching is based on order\ninstead of name. The implementation will figure out the names based on this order.\nVersion\nThis version of the operator has been available since version 16 of the default ONNX\noperator set.\nOther versions of this operator: 1, 11, 13\nAttributes\nbody : graph (required)\nThe graph run each iteration. It has 2+N inputs: (iteration_num, condition, loop\ncarried dependencies...). It has 1+N+K outputs: (condition, loop carried\ndependencies..., scan_outputs...). Each scan_output is created by\nconcatenating the value of the specified output value at the end of each iteration\nof the loop. It is an error if the dimensions or data type of these scan_outputs\nchange across loop iterations.\nInputs (2 - ∞)\nM (optional) : I\nA maximum trip-count for the loop specified at runtime. Optional. Pass empty\nstring to skip.\ncond (optional) : B\nA boolean termination condition. Optional. Pass empty string to skip.\nv_initial (variadic, heterogeneous) : V\nThe initial values of any loop-carried dependencies (values that change across\nloop iterations)\nOutputs (1 - ∞)\nv_final_and_scan_outputs (variadic, heterogeneous) : V\nFinal N loop carried dependency values then K scan_outputs. Scan outputs\nmust be Tensors.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n103 / 2452022/03/05 12:21\n\nType Constraints\nV : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8),\ntensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16),\ntensor(float), tensor(double), tensor(string), tensor(bool), tensor(complex64),\ntensor(complex128), seq(tensor(uint8)), seq(tensor(uint16)),\nseq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(int8)), seq(tensor(int16)),\nseq(tensor(int32)), seq(tensor(int64)), seq(tensor(bfloat16)),\nseq(tensor(float16)), seq(tensor(float)), seq(tensor(double)),\nseq(tensor(string)), seq(tensor(bool)), seq(tensor(complex64)),\nseq(tensor(complex128)), optional(seq(tensor(uint8))),\noptional(seq(tensor(uint16))), optional(seq(tensor(uint32))),\noptional(seq(tensor(uint64))), optional(seq(tensor(int8))),\noptional(seq(tensor(int16))), optional(seq(tensor(int32))),\noptional(seq(tensor(int64))), optional(seq(tensor(bfloat16))),\noptional(seq(tensor(float16))), optional(seq(tensor(float))),\noptional(seq(tensor(double))), optional(seq(tensor(string))),\noptional(seq(tensor(bool))), optional(seq(tensor(complex64))),\noptional(seq(tensor(complex128))), optional(tensor(uint8)),\noptional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)),\noptional(tensor(int8)), optional(tensor(int16)), optional(tensor(int32)),\noptional(tensor(int64)), optional(tensor(bfloat16)), optional(tensor(float16)),\noptional(tensor(float)), optional(tensor(double)), optional(tensor(string)),\noptional(tensor(bool)), optional(tensor(complex64)),\noptional(tensor(complex128))\nAll Tensor, Sequence(Tensor), Optional(Tensor), and\nOptional(Sequence(Tensor)) types\nI : tensor(int64)\ntensor of int64, which should be a scalar.\nB : tensor(bool)\ntensor of bool, which should be a scalar.\nExamples\n▸ loop_11\n▸ loop_13\n▸ loop_16_none\nLpNormalization\nGiven a matrix, apply Lp-normalization along the provided axis.\nonnx/Operators.md at main · onnx/onnxhttps://github.com/onnx/onnx/blob/main/docs/Operators...\n104 / 2452022/03/05 12:21" + }, + "doi_10.1006/inco.1996.2613": { + "path": [ + "region-based-memory-management.pdf" + ], + "idType": "doi", + "tags": [], + "comments": "", + "text": "\n\nFile: 643J261301 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2082 . Length: 58 pic 2 pts, 245 mm\nInformation and Computation \u0015 IC2613\ninformation and computation132, 109\u0015176 (1997)\nRegion-Based Memory Management\n1\nMads Tofte\nDepartment of Computer Science,University of Copenhagen,\nUniversitetsparken1,DK2100Copenhagen,Denmark\nand\nJean-Pierre Talpin\nIRISA(Inria-Rennes and CNRS URA227),Campus de Beaulieu,\n35000Rennes Cedex,France\nThis paper describes a memory management discipline for programs\nthat perform dynamic memory allocation and de-allocation. At runtime, all\nvalues are put intoregions. The store consists of a stack of regions. All\npoints of region allocation and de-allocation are inferred automatically,\nusing a type and effect based program analysis. The scheme does not\nassume the presence of a garbage collector. The scheme was first\npresented in 1994 (M. Tofte and J.-P. Talpin,in``Proceedings of the\n21st ACM SIGPLAN\u0015SIGACT Symposium on Principles of Programming\nLanguages,'' pp. 188\u0015201); subsequently, it has been tested in The ML\nKit with Regions, a region-based, garbage-collection free implementation\nof the Standard ML Core language, which includes recursive datatypes,\nhigher-order functions and updatable references L. Birkedal, M. Tofte,\nand M. Vejlstrup, (1996),in``Proceedings of the 23 rd ACM SIGPLAN\u0015\nSIGACT Symposium on Principles of Programming Languages,''\npp. 171\u0015183. This paper defines a region-based dynamic semantics for a\nskeletal programming language extracted from Standard ML. We present\nthe inference system which specifies where regions can be allocated and\nde-allocated and a detailed proof that the system is sound with respect to\na standard semantics. We conclude by giving some advice on how to\nwrite programs that run well on a stack of regions, based on practical\nexperience with the ML Kit.\n]\n1997 Academic Press\nContents\n1.Introduction.\n2.Related work.\narticle no.IC962613\n109\n0890-5401\u001297\u001e25.00\nCopyright\u00171997 by Academic Press\nAll rights of reproduction in any form reserved.\n1\nAn earlier version of this work was presented at the 21st ACM SIGPLAN-SIGACT Symposium on\nPrinciples of Programming Languages, Portland, Oregon, January 1994.\n\nFile: 643J261302 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3429 Signs: 2963 . Length: 52 pic 10 pts, 222 mm\n3.The source language, SExp. 3.1. Notation. 3.2. Static semantics for source. 3.3. Dynamic semantics for\nsource.\n4.The target language, TExp. 4.1. Dynamic semantics for target. 4.2. Example: function values.\n4.3. Example: region polymorphism. 4.4. Design choises. 4.5. Properties of region-based evaluation.\n4.6 Syntactic equality of expressions.\n5.Region inference. 5.1. Semantic objects. 5.2. The inference system. 5.3. Region inference is a refinement\nof Milner's type system. 5.4. Substitution lemma.\n6.Using effects to describe continuations.\n7.Consistency.\n8.Properties of consistency. 8.1. Rule-based co-induction. 8.2. Preservation of consistency. 8.3. Region\nrenaming. 8.4. Region allocation. 8.5. Recursion.\n9.Proof of the correctness of the translation.\n10.Algorithms.\n11.Language extensions. 11.1. References. 11.2. Exceptions. 11.3. Recursive datatypes.\n12.Strengths and weaknesses. 12.1. Small examples. 12.1.1. Polymorphic recursion. 12.1.2. Tail recursion.\n12.1.3. Higher-order functions. 12.2. Larger benchmarks. 12.3. Automatic program transformation.\n12.4. Conclusion.\nAppendix A:Example three-address code\nAppendix B:Nomenclature\n1. INTRODUCTION\nComputers have finite memory. Very often, the total memory allocated by a\nprogram as it is run on a computer far exceeds the size of the computer's memory.\nThus, a practical discipline of programming must provide some form of memory\nrecycling.\nOne of the key achievements of early work in programming languages was the\ninvention of the notion of block structure and the associated implementation\ntechnology of stack-based memory management for recycling of memory. In block-\nstructured languages, every point of allocation is matched by a point of de-alloca-\ntion and these points can easily be identified in the source program (Naur, 1963;\nDijkstra, 1960). Properly used, the stack discipline can result in very efficient use\nof memory, the maximum memory usage being bounded by the depth of the call\nstack rather than the number of memory allocations.\nThe stack discipline has its limitations, however, as witnessed by restrictions in\nthe type systems of block-structured languages. For example, procedures are typi-\ncally prevented from returning lists or procedures as results. There are two main\nreasons for such restrictions.\nFirst, for the stack discipline to work, the size of a value must be known at latest\nwhen space for that value is allocated. This allows, for example, arrays which are\nlocal to a procedure and have their size determined by the arguments of the proce-\ndure; by contrast, it is not in general possible to determine how big a list is going\nto become, when generation of the list begins.\nSecond, for the stack-discipline to work, the life-time of values must comply with\nthe allocation and de-allocation scheme associated with block structure. When\nprocedures are values, there is a danger that a procedure value refers to values\nwhich have been de-allocated. For example, consider the following program:\n110\nTOFTE AND TALPIN\n\nFile: 643J261303 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3887 Signs: 3130 . Length: 52 pic 10 pts, 222 mm\n(letx=(2,3)\nin (fnyO(*1x,y))\nend\n)(5)\nThis expression is an application of a function (denoted by(let}}}end)) to the\nnumber 5. The function has formal parameteryand body(*1x,y), where*1\nstands for first projection. (fnis pronounced*in SML.) Thus the operator expres-\nsion is supposed to evaluate to(fnyO(*1x,y)), wherexis bound to the pair\n(2, 3), so that the whole expression evaluates to the pair (2, 5). However, if we\nregard thelet}}}endconstruct as a block construct (rather than just a lexical\nscope), we see why a stack-based implementation would not work: we cannot de-\nallocate the space forxat theend, since the first component ofxis still needed by\nthe function which is returned by the entireletexpression.\nOne way to ease the limitations of the stack discipline is to allow programmer\ncontrolled allocation and de-allocation of memory, as is done in C. (C has two\noperations,mallocandfree, for allocation and de-allocation, respectively.)\nUnfortunately, it is in general very hard for a programmer to know when a block\nof memory does not contain any live values and may therefore be freed; conse-\nquently, this solution very easily leads to so-calledspace leaks, i.e., to programs that\nuse much more memory than expected.\nFunctional languages (such as Haskell and Standard ML) and some object-\noriented languages (e.g., JAVA) instead let a separate routine in the runtime\nsystem, thegarbage collector, take care of de-allocation of memory [3; 14; 15].\nAllocation is done by the program, often at a very high rate. In our example, the\nthree expressions(2, 3),(fnyO(*1x,y)), and(*1x,y)each allocate\nmemory each time they are evaluated. The part of memory used for holding such\nvalues is called theheap; the ro^ le of the garbage collector is to recycle those parts\nof the heap that hold only dead values, i.e., values which are of no consequence to\nthe rest of the computation.\nGarbage collection can be very fast, provided the computer has enough memory.\nIndeed, there is a much quoted argument that the amortized cost of copying gar-\nbage collection tends to zero as memory tends to infinity [2, p. 206]. It is not the\ncase, however, that languages such as Standard ML free the programmer com-\npletely from having to worry about memory management. To write efficient SML\nprograms, one must understand the potential dangers of, for example, accidental\ncopying or survival of large data structures. If a program is written without concern\nfor space usage, it may well use much more memory than one would like; even if\nthe problem is located (using a space profiler, for example), turning a space-wasting\nprogram into a space-efficient one may require major changes to the code.\nThe purpose of the work reported in this paper is to advocate a compromise\nbetween the two extremes (completely manual vs completely automatic memory\nmanagement). We propose a memory model in which memory can be thought of\nas a stack of regions; see Fig. 1. Each region is like a stack of unbounded size which\ngrows upwards in the picture until the region in its entirety is popped off the region\n111\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261304 . By:XX . Date:20:02:97 . Time:10:28 LOP8M. V8.0. Page 01:01\nCodes: 2641 Signs: 1587 . Length: 52 pic 10 pts, 222 mm\nFIG. 1.The store is a stack of regions; every region is uniquely identified by aregion name\n(e.g.,r\n0\n) and is depicted by a box in the picture.\nstack. For example, a typical use of a region is to hold a list. A program analysis\nautomatically identifies program points where entire regions can be allocated and\nde-allocated and decides, for each value-producing expression, into which region\nthe value should be put.\nMore specifically, we translate every well-typed source language expression,e,\ninto a target language expression,e$, which is identical withe, except for certain\nregion annotations. The evaluation ofe$ corresponds, step for step, to the evalua-\ntion ofe. Two forms of annotation are\ne\n1\nat\\\nletregion\\ine\n2\nend\nThe first form is used whenevere\n1\nis an expression which directly produces a value.\n(Constant expressions,*-abstractions and tuple expressions fall into this category.)\nThe\\is aregion variable; it indicates that the value ofe\n1\nis to be put in the region\nbound to\\.\nThe second form introduces a region variable\\with local scopee\n2\n. At runtime, first\nan unused region, identified by aregion name,r, is allocated and bound to\\. Thene\n2\nis evaluated (probably using the region namedr). Finally, the region is de-allocated.\nTheletregionexpression is the only way of introducing and eliminating regions.\nHence regions are allocated and de-allocated in a stack-like manner.\nThe target program which corresponds to the above source program is\ne$#letregion\\\n4\n,\\\n5\nin letregion\\\n6\nin let x=(2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x,y)at\\\n1\n)at\\\n5\nend\nend\n5at\\\n3\nend\n112\nTOFTE AND TALPIN\n\nFile: 643J261305 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3877 Signs: 3467 . Length: 52 pic 10 pts, 222 mm\nWe shall step through the evaluation of this expression in detail in Section 4.\nBriefly, evaluation starts in a region stack with three regions (\\\n1\n,\\\n2\n, and\\\n3\n);\nevaluation then allocates and de-allocates three more regions (\\\n4\n,\\\n5\n, and\\\n6\n) and\nat the end,\\\n1\n,\\\n2\n, and\\\n3\ncontain the final result.\nThe scheme forms the basis of the ML Kit with Regions, a compiler for the\nStandard ML Core language, including higher-order functions, references and\nrecursive datatypes. The region inference rules we describe in this paper address life\ntimes only. A solution to the other problem, handling values of unknown size, is\naddressed in [5]. An important optimisation turns out to be to distinguish between\nregions, whose size can be determined statically and those that cannot. The former\ncan be allocated on a usual stack.\nUsing C terminology, region analysis infers where to insert calls tomallocand\nfree\u0015\u0015but beware that the analysis has only been developed in the context of\nStandard ML and relies on the fact that SML is rather more strongly typed than\nC. For a strongly typed imperative language like JAVA, region inference might be\nuseful for freeing memory (unlike C, JAVA does not havefree). For readers who\nare interested in code generation, Appendix A shows the three-address program\nwhich the ML Kit produces from the above program, using both region inference\nand the additional optimisations described in [5]. However, this paper is primarily\nabout the semantics of regions, not their implementation.\nExperience with the Kit is that, properly used, the region scheme is strong\nenough to execute demanding benchmarks and to make considerable space savings,\ncompared to a garbage-collected system [5]. We have found that most of the\nallocation is handled well by the automatic region analysis; occasionally it is too\nconservative and here a garbage collector would probably be useful, especially if the\nprogrammer does not know the region inference rules; for now, we have chosen\ninstead to make (usually small) transformations to the source programs to make\nthem more ``region friendly.'' We shall describe some of those transformations\ntowards the end of this paper.\nA very important property of our implementation scheme is that programs are\nexecuted ``as they are written'', with no additional costs of unbounded size (see\nAppendix A for a detailed example). The memory management directives which are\ninserted are each constant time operations. This opens up the possibility of using\nlanguages with the power of Standard ML for applications where guarantees about\ntime and space usage are crucial, for example in real time programming or embedded\nsystems.\nThe key problem which is addressed in this paper is to prove that the region\ninference system is safe, in particular, that de-allocation really is safe, when the\nanalysis claims that it is safe.\nWe do this as follows. We first define a standard operational semantics for our\nskeletal source language, giving both a static and a dynamic semantics (Section 3).\nWe then define a region-based operational semantics for a target language; the\ntarget language is identical to the source language, except that programs have been\nannotated with region information (Section 4). In the dynamic semantics of the\nsource language, there is no notion of store; in the target language semantics,\nhowever, there is a store which is organised as a stack of regions. We then specify\n113\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261306 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3601 Signs: 3242 . Length: 52 pic 10 pts, 222 mm\nthe translation from source language to target language in the form of an inference\nsystem (Section 5). We then define a representation relation between values in a\nstandard semantics for our skeletal language and values in a region-based semantics\n(Section 7) and show that, for every subexpressioneof the original program, as far\nas the rest of the computation (after the evaluation ofe) is concerned,eand its\nimage in the target program evaluate to related values, when evaluated in related\nenvironments (Section 9). Restricting attention to what the rest of the computation\ncan observe turns out to be crucial: some connections between values in the source\nlanguage semantics and in the region-based semantics are lost when memory is re-\nused in the region-based semantics. The key point is that on that part of target\nmachine which can be observed by the rest of the computation, every value used\nin the source language is faithfully represented by a value in the target language.\nThis representation relation is defined as the maximal fixed point of a certain\nmonotonic operator. Properties of the relation are proved using a method of proof\nwhich we callrule-based co-induction(Section 8.1).\nAlgorithms for region inference are beyond the scope of this paper; however, we\nshall give some hints about how the region inference rules we present can be\nimplemented (Section 10).\n2. RELATED WORK\nThe main differences between the region stack and the traditional stack discipline\nfor block-structured languages are as follows. First, when a value is created in our\nscheme, it is not necessarily put into the topmost region. In the case of function\nclosures, for example, the closure is put as far down the stack as is necessary in\norder to be sure that the closure will still exist should it ever be accessed. Second,\nnot all regions have a size which can be determined at the time the region is\nallocated. Finally, the scheme works for higher-order functions and recursive\ndatatypes and allocation is based on the basis of the type system of the language,\nnot the grammar.\nRuggieri and Murtagh [22] propose a stack of regions in conjunction with a\ntraditional heap. Each region is associated with an activation record (this is not\nnecessarily the case in our scheme). They use a combination of interprocedural and\nintraprocedural data-flow analysis to find suitable regions to put values in. We use\na type-inference based analysis, and this is crucial for the handling of polymorphism\nand higher-order functions.\nInoue and Yagi [13] present an interesting technique for compile-time analysis\nof runtime garbage cells in lists. Their method inserts pairs of HOLD and\nRECLAIM'instructions in the target language. HOLD holds on to a pointer,p\nsay, to the root cell of its argument and RECLAIM'collects those cells that are\nreachable frompand fit the path description'. HOLD and RECLAIM pairs are\nnested, so the HOLD pointers can be held in a stack, not entirely unlike our stack\nof regions. In our scheme, however, the unit of collection is one entire region, i.e.,\nthere is no traversal of values in connection with region collection. The path\ndescriptions of Inoue and Yagi make it possible to distinguish between the\n114\nTOFTE AND TALPIN\n\nFile: 643J261307 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3486 Signs: 2644 . Length: 52 pic 10 pts, 222 mm\nindividual members of a list. This is not possible in our scheme, as we treat all the\nelements of the same list as equal. Inoue and Yagi report a 1000reclamation rate\nfor garbagelistcells produced by Quicksort [13, p. 575]. We obtain a 1000\nreclamation rate (but for 1 word) forallgarbage produced by Quicksort, without\ngarbage collection [26].\nHudak [11] describes a reference counting scheme for a first-order call-by-value\nfunctional language. Turneret al. [27] use a type system inspired by linear logic to\ndistinguish between variables which are used at most once and variables which may\nbe used more than once. These analyses provide somewhat different information\nfrom ours: we only distinguish between ``no use'' and ``perhaps some use.''\nGeorgeff [10] describes an implementation scheme for typed lambda expressions\nin so-called simple form together with a transformation of expressions into simple\nform. The transformation can result in an increase in the number of evaluation\nsteps by an arbitrarily large factor [10, p. 618]. Georgeff also presents an\nimplementation scheme which does not involve translation, although this relies on\nnot using call-by-value reduction, when actual parameters are functions.\nThe device we use for grouping values according to regions is unification of\nregion variables, using essentially the idea of Baker (1990), namely that two value-\nproducing expressionse\n1\nande\n2\nshould be given the same ``at\\'' annotation, if and\nonly if type checking, directly or indirectly, unifies the type ofe\n1\nande\n2\n. Baker does\nnot prove safety, however, nor does he deal with polymorphism.\nTo obtain good separation of lifetimes, we useexplicit region polymorphism,by\nwhich we mean that regions can be given as arguments to functions at runtime. For\nexample, a declaration of the successor functionfunsucc(x)=x+1 is compiled\ninto\nfunsucc[\\,\\$](x)=letregion\\\"\nin(x+(1at\\\"))at\\$\nend\nNote thatsucchas been decorated with two extra formal region parameters\n(enclosed in square brackets to distinguish them from value variables such asx).\nThe newsuccfunction has type scheme\n\\\\,\\$.(int,\\)wwwww\u0014\n[get(\\),put(\\$)]\n(int,\\$)\nmeaning that, for any\\and\\$, the function accepts an integer at\\and produces\nan integer at\\$ (performing agetoperation on region\\and aputoperation on\nregion\\$ in the process). Nowsuccwill put its result in different regions, depending\non the context:\n}}}succ[\\\n12\n,\\\n9\n](5 at\\\n12\n)}}}succ[\\\n1\n,\\\n4\n](y)\nWe make the additional provision that a recursive function,f, can call itself with\nregion arguments which are different from its formal region parameters and which\n115\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261308 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3724 Signs: 3055 . Length: 52 pic 10 pts, 222 mm\nmay well be local to the body of the recursive function. Such local regions resemble\nthe activation records of the classical stack discipline.\nWe use ideas from effect inference [12, 16, 17] to find out where to wrap\nletregion\\in . . . end around an expression. Most work on effect inference uses\nthe word ``effect'' with the meaning ``side-effect'' or, in concurrent languages, ``com-\nmunication effect'' [21a]. However, our effects are side-effects relative to the under-\nlying region-based store model, irrespective of whether these effects stem from\nimperative features or not.\nThe idea that effect inference makes it possible to delimit regions of memory and\ndelimit their lifetimes goes back to early work on effect systems. Lucassen and Gif-\nford [16] call iteffect masking; they prove that (side-) effect masking is sound with\nrespect to a store semantics where regions are not reused. Talpin [23] and Talpin\nand Jouvelot [24] present a polymorphic effect system with (side-) effect masking\nand prove that it is sound, with respect to a store semantics where regions are not\nreused.\nThe first version of the proof of the present paper was recorded in a technical\nreport [25], which in turn was used as the basis for the proof outline in [26]. In\norder to simplify the proofs, several modifications to the early proofs have been\nmade. The main differences are: (a) we have adopted the value restriction on poly-\nmorphism, resulting in simpler proofs; in particular, a difficult lemma\u0015\u0015Lemma 4.5\nin [25]\u0015\u0015is not required under the value restriction; (b) the dynamic semantics of\nthe target language has been extended with region environments; (c) the definition\nof consistency has been strengthened to prevent closures with free region variables\n(these used to complicate the proof) (d) the proofs have been rewritten and\nreorganised around the idea of rule-based co-induction.\nAikenet al. [1] have developed a program analysis which can be used as a post-\npass to the analysis described in the present paper. Their analysis makes it possible\nto delay the allocation of regions and to promote the de-allocation, sometimes\nleading to asymptotic improvements in space usage and never leading to worse\nresults than region inference without their analysis added.\n3. THE SOURCE LANGUAGE, SExp\nThe skeletal language treated in this paper is essentially Milner's polymorphically\ntyped lambda calculus [18]. We assume a denumerably infinite set Var of (program)\nvariables. We usexandfto range over variables. Finally,cranges over integer con-\nstants. The grammar for the source language is:\ne::=c|x|*x.e|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf(x)=e\n1\nine\n2\nend\nLet SExp denote the set of source language expressions. The addition of pairs and\ntuples to the theory is straightforward. (References, exceptions, and recursive\ndatatypes have been added in the implementation, but correctness of the translation\nof these constructs has not been proved.) Call-cc, concurrency primitives, and other\nsubstantial extensions of Standard ML have not been studied. Nor is it clear\n116\nTOFTE AND TALPIN\n\nFile: 643J261309 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3623 Signs: 2786 . Length: 52 pic 10 pts, 222 mm\nwhether region inference can be made to bear on lazy functional languages. The fact\nthat ML is typed is essential; the fact that it has polymorphism is not essential for\nwhat follows.\n3.1. Notation\nIn the rest of this paper we shall use the following terminology. Afinitemap is\na map with finite domain. Given setsAandB, the set of finite maps fromAtoB\nis denotedAw\u0014\nfin\nB. The domain and range of a finite mapfare denoted Dom(f)\nand Rng(f), respectively. Whenfandgare finite maps,f+gis the finite map\nwhose domain is Dom(f)_Dom(g) and whose value isg(x), ifx# Dom(g), and\nf(x) otherwise. For any mapfand setA, we writefaAto mean the restriction of\nftoA. We sometimes write a tuple of region variables, for example, in the form\n\\\n1\n}}}\\\nk\n, i.e, without parentheses and commas.\nWe often need to select components of tuples\u0015\u0015for example, the region name of\nan address. In such cases, we rely on variable names to indicate which component\nis being selected. For example, ``rofa'' means ``the region name component ofa''.\n(As we shall see, an address is a pair of the form (r,o), whereris a region name\nandois an offset.)\n3.2. Static Semantics for Source\nFollowing Damas and Milner (1982), we haveML typesandML type schemes\ndefined by\n{\nML\n::=int|:|{\nML\n\u0014{\nML\nML type\n_\nML\n::=\\:\n1\n}}}:\nn\n.{\nML\nML type scheme (n\u001e0),\nwhere:ranges over a denumerably infinite set TyVar oftype variables. An ML type\n{\nML\n0\nisan instanceof an ML type scheme_\nML\n=\\:\n1\n}}}:\nn\n.{\nML\n, written_\nML\n\u001e{\nML\n0\n,\nif there exist{\nML\n1\n, ...,{\nML\nn\nsuch that{\nML\n[{\nML\n1\n\u0012:\n1\n, ...,{\nML\nn\n\u0012:\nn\n]={\nML\n0\n.AnML type\nenvironmentis a finite map from program variables to ML type schemes. We use\nTE\nML\nto range over type environments. Whenois an ML type, type scheme, or\ntype environment, ftv(o) denotes the set of type variables that occur free ino.\nIn Milner's original type discipline, polymorphism is associated withlet. It has\nturned out that there are advantages to restricting polymorphism so that inlet\nx=e\n1\nine\n2\nend,xonly gets a type scheme ife\n1\nis a syntactic value. (In the present\nlanguage, a syntactic value is an integer constant or a lambda abstraction.) This\nrestriction is known as thevalue restriction. Besides making it easier to prove\nsoundness in connection with references and other language extensions, imposing\nthis restriction also makes the proofs of correctness of region inference simpler (we\nhave done both). In fact, we shall take the restriction one step further, and only\nallow polymorphism in connection withletrec. Any program which satisfies the\nvalue restriction can be turned into an equivalent program which only has\nletrec-polymorphism, by simply turning everyletx=e\n1\nine\n2\nendinto\nletrecx$(z)=e\n1\nine\n2\n[x$(0)\u0012x]endwherex$ andzare fresh variables. In the\n117\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261310 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 2876 Signs: 1421 . Length: 52 pic 10 pts, 222 mm\ntheory that follows we therefore only have polymorphism in connection with\nletrec. With this convention,letx=e\n1\nine\n2\nendis just syntactic sugar for\n(*x.e\n2\n)(e\n1\n). We show the rules forleteven so, to make it easier to follow the\nexamples:\nTE\nML\n(x)=_\nML\n_\nML\n\u001e{\nML\nTE\nML\n|&x:{\nML\nTE\nML\n+[x[{\nML\n1\n]|&e:{\nML\n2\nTE\nML\n|&*x.e:{\nML\n1\n\u0014{\nML\n2\nTE\nML\n|&e\n1\n:{\nML\n0\n\u0014{\nML\nTE\nML\n|&e\n2\n:{\nML\n0\nTE\nML\n|&e\n1\ne\n2\n:{\nML\nTE\nML\n|&e\n1\n:{\nML\n1\nTE\nML\n+[x[{\nML\n1\n]|&e\n2\n:{\nML\nTE\nML\n|&letx=e\n1\nine\n2\nend:{\nML\nTE\nML\n+[f[{\nML\n]|&*x.e\n1\n:{\nML\n[:\n1\n, ...,:\nn\n]&ftv(TE\nML\n)=<\nTE\nML\n+[f[\\:\n1\n}}}:\nn\n.{\nML\n]|&e\n2\n:{\nML\n2\nTE\nML\n|&letrecf(x)=e\n1\nine\n2\nend:{\nML\n2\n3.3. Dynamic Semantics for Source\nAnon-recursive closureis a triple(x,e,E), whereEis anenvironment, i.e., a\nfinite map from variables to values. We useEto range over environments; the set\nof environments is denoted Env. Arecursive closuretakes the form(x,e,E,f),\nwherefis the name of the recursive function in question. Avalueis either an integer\nconstant or a closure. We usevto range over values; the set of values is denoted\nVal.\nEvaluation rules appear below. They allow one to infer statements of the form\nE|&e\u0014v, read:in environment E the expression e evaluates to value v. A closure\nrepresenting a recursive function is ``unrolled'' just before it is applied (rule (5)):\nExpressions[E|&e\u0014v].\nE|&c\u0014c(1)\nE(x)=v\nE|&x\u0014v\n(2)\nE|&*x.e\u0014(x,e,E)(3)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n)E|&e\n2\n\u0014v\n2\nE\n0\n+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(4)\nE|&e\n1\n\u0014(x\n0\n,e\n0\n,E\n0\n,f) E|&e\n2\n\u0014v\n2\nE\n0\n+[f[(x\n0\n,e\n0\n,E\n0\n,f)]+[x\n0\n[v\n2\n]|&e\n0\n\u0014v\nE|&e\n1\ne\n2\n\u0014v\n(5)\n118\nTOFTE AND TALPIN\n\nFile: 643J261311 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3488 Signs: 2051 . Length: 52 pic 10 pts, 222 mm\nE|&e\n1\n\u0014v\n1\nE+[x[v\n1\n]|&e\n2\n\u0014v\nE|&letx=e\n1\nine\n2\nend\u0014v\n(6)\nE+[f[(x,e\n1\n,E,f)]|&e\n2\n\u0014v\nE|&letrecf(x)=e\n1\nine\n2\nend\u0014v\n(7)\n4. THE TARGET LANGUAGE, TExp\nWe assume a denumerably infinite set RegVar=[\\\n1\n,\\\n2\n, ...]ofregion variables;\nwe use\\to range over region variables. The grammar for the target language,\nTExp, is\ne::=c|x|f[\\\n1\n, ...,\\\nn\n]at\\|*x.eat\\\n|e\n1\ne\n2\n|letx=e\n1\nine\n2\nend\n|letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\n|letregion\\ineend\nAs is common, functions are represented by closures; but region-polymorphic func-\ntions (introduced byletrecf[ }}} ](x)= } } } ) are represented by so-called region\nfunction closures, which are different from closures. In the expression form*x.eat\n\\, the\\indicates the region into which the closure representing*x.eshould be put.\n(Hence, theat\\qualifies*x.e, note.) In\nletrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\nthe\\indicates where the region function closure forfshould be put. A subsequent\napplicationf[\\$\n1\n, ...,\\$\nn\n]at\\$ extracts this region function closure from the store,\napplies it to actual arguments\\$\n1\n, ...,\\$\nk\n, and creates a function closure in\\$.\nFor any finite set[\\\n1\n, ...,\\\nk\n]of region variables (k\u001e0), we writeletregion\n\\\n1\n, ...,\\\nk\nineendforletregion\\\n1\nin}}}letregion\\\nk\nineend}}}end.\nWe shall not present a separate static semantics for the target language, for such\na semantics can be extracted from the translation rules in Section 5. We thus\nproceed to the dynamic semantics.\n4.1. Dynamic Semantics for Target\nAssume a denumerably infinite set RegName=[r1,r2, ...]ofregion names;we\nuserto range over region names. Region names serve to identify regions at run-\ntime. Further, assume a denumerable infinite set, OffSet, ofoffsets; we useoto\nrange over offsets.\nAregionis a finite map from offsets to storable values. Astorable valueis either\nan integer constant, a function closure, or a region function closure. We usesvto\nrange over storable values; the set of storable values is denoted StoreVal. Avariable\nenvironmentis a finite map from program variables to values. We useVEto range\n119\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261312 . By:CV . Date:20:03:97 . Time:13:01 LOP8M. V8.0. Page 01:01\nCodes: 3926 Signs: 3414 . Length: 52 pic 10 pts, 222 mm\nover variable environments; the set of variable environments is denoted TargetEnv.\nAregion environmentis a finite map from region variables to region names. We use\nRto range over region environments; the set of region environments is denoted\nRegEnv. Afunction closureis a quadruple(x,e$,VE,R), wherexis a program\nvariable,e$ is a target language expression, andVEandRgive meaning to the\nfree program and region variables of*x.e$. Aregion function closureis a tuple\nof the form(\\\n1\n}}}\\\nk\n,x,e,VE,R). Region function closures represent region-\npolymorphic functions; the region variables\\\n1\n, ...,\\\nk\nare required to be distinct and\nare referred to as theformal parametersof the region function closure.\nAnaddressis a pair (r,o) of a region name and an offset. We useato range over\naddresses and Addr to denote the set of addresses. For any addressa, we writer\nof ato mean the first component (i.e., the region name) ofa.Astoreis a finite map\nfrom region names to regions. We usesto range over stores; the set of stores is\ndenoted Store.\nAvalueis an address. We usevto range over values; the set of values is denoted\nTargetVal.\nWe shall be brief about indirect addressing: whenevera=(r,o) is an address, we\nwrites(a) to means(r)(o). Similarly, we writes+[(r,o)[sv]as a shorthand for\ns+[r[(s(r)+[o[sv])]. Moreover, we define theplanar domain of s, written\nPdom(s), to be the finite set[(r,o) # Addr |r# Dom(s)7o# Dom(s(r))]. Finally,\nwe write ``s\"\"[r]'' (read:s without r) to mean the storesa(Dom(s)\"[r]).\nThe inference rules for the dynamic semantics of TExp are shown below. They\nallow one to infer sentences of the forms,VE,R|&e$\u0014v$,s$, read:In store s,\nvariable environment VE,and region environment R,the target expression e$evaluates\nto value v$and(a perhaps modified)store s$.\nRule 10 the evaluation rule for application of a region function closure. A func-\ntion closure is created from the region closure. One can imagine that a runtime-\nerror occurs if the premises cannot be satisfied (for example, because\\$\ni\n\u0012Dom(R),\nfor som\\$\ni\n). However, the correctness proof shows that the premises always can be\nsatisfied for programs that result from the translation.\nRule 14 concerns region-polymorphic and (possibly) recursive functions. For\nreasons explained in Section 5.2, we have chosen to combine the introduction of\nrecursion and region polymorphism in one language construct. Functions defined\nwithletrecneed not be recursive, so one can also use theletrecconstruct to\ndefine region functions that produce non-recursive functions. Rule 14 creates a\nregion closure in the store and handles recursion by creating a cycle in the store:\nfirst a ``fresh address'' is chosen (by side-conditionsr=R(\\),o\u0012Dom(s(r)); the\nenvironmentVE$=VE+[f[(r,o)]is stored in the region function closure\n(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R), which in turn is stored in the fresh address chosen\nearlier. Any reference tofine\n1\nwill then yield the region function closure itself, by\nRule 10, as desired (sinceletrecintroduces recursion). Moreover, in any function\napplication, the operator expression will evaluate to a pointer to an ordinary\nfunction closure(x,e,VE\n0\n,R\n0\n), even if the operator expression is of the\nformf[\\$\n1\n, ...,\\$\nk\n]at\\. Consequently, a single rule for function application\nsuffices.\nFinally, the pushing and popping of the region stack is seen in Rule 15.\n120\nTOFTE AND TALPIN\n\nFile: 643J261313 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2895 Signs: 1367 . Length: 52 pic 10 pts, 222 mm\nExpressions[s,VE,R|&e\u0014v,s$].\nR(\\)=ro\u0012Dom(s(r))\ns,VE,R|&cat\\\u0014(r,o),s+[(r,o)[c]\n(8)\nVE(x)=v\ns,VE|&x\u0014v,s\n(9)\nVE(f)=as(a)=(\\\n1\n, ...,\\\nk\n,x,e,VE\n0\n,R\n0\n)\nr=R(p)o\u0012Dom(s(r))sv=(x,e,VE\n0\n,R\n0\n+[\\\ni\n[R(\\$\ni\n); 1\u001di\u001dk])\ns,VE,R|&f[\\$\n1\n, ...,\\$\nk\n]at\\\u0014(r,o),s+[(r,o)[sv]\n(10)\nr=R(\\)o\u0012Dom(s(r))\ns,VE,R|&*x.eat\\\u0014(r,o),s+[(r,o)[(x,e,VE,R) ]\n(11)\ns,VE,R|&e\n1\n\u0014a\n1\n,s\n1\ns\n1\n(a\n1\n)=(x\n0\n,e\n0\n,VE\n0\n,R\n0\n)\ns\n1\n,VE,R|&e\n2\n\u0014v\n2\n,s\n2\ns\n2\n,VE\n0\n+[x\n0\n[v\n2\n],R\n0\n|&e\n0\n\u0014v,s$\ns,VE,R|&e\n1\ne\n2\n\u0014v,s$\n(12)\ns,VE,R|&e\n1\n\u0014v\n1\n,s\n1\ns\n1\n,VE+[x[v\n1\n],R|&e\n2\n\u0014v,s$\ns,VE,R|&letx=e\n1\nine\n2\nend\u0014v,s$\n(13)\nr=R(\\)o\u0012Dom(s(r))VE$=VE+[f[(r,o)]\ns+[(r,o)[(\\\n1\n, ...,\\\nk\n,x,e\n1\n,VE$,R)],VE$,R|&e\n2\n\u0014v,s$\ns,VE,R|&letrecf[\\\n1\n, ...,\\\nk\n](x)at\\=e\n1\nine\n2\nend\u0014v,s$\n(14)\nr\u0012Dom(s)s+[r[[]],VE,R+[\\[r]|&e\u0014v,s\n1\ns,VE,R|&letregion\\ineend\u0014v,s\n1\n\"\"[r]\n(15)\nWe now illustrate the use of the rules by two examples, comment on the design deci-\nsions embodied in the rules and finally prove some properties about the semantics.\n4.2. Example: Function Values\nLet us consider the evaluation of the expressione$ from Section 1. Since\\\n1\n,\\\n2\n,\nand\\\n3\noccur free ine$, they must be allocated before the evaluation ofe$ begins.\nWe show three snapshots from the evaluation ofe$, namely (a) just after the closure\nhas been allocated, (b) just before the closure is applied, and (c) at the end; we\nassume six regions with namesr\n1\n, ...,r\n6\n, which become bound to\\\n1\n, ...,\\\n6\n, respec-\ntively. Notice the dangling, but harmless, pointer at (b):\n121REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261314 . By:XX . Date:20:02:97 . Time:10:29 LOP8M. V8.0. Page 01:01\nCodes: 2292 Signs: 1335 . Length: 52 pic 10 pts, 222 mm\n4.3. Example: Region Polymorphism\nThis example illustrates region polymorphism and the use of polymorphic recur-\nsion. Consider the following source expression, which computes the 15th Fibonacci\nnumber:\nletrec fib(x)=ifx=0 then 1\nelse ifx=1 then 1\nelse fib(x&2)+fib(x&1)\nin fib(15) end\nThe corresponding target expression is shown in Fig. 2. In the target expression,\nthefibfunction takes two arguments, namely\\\n3\n, which is the region wherexis\nlocated, and\\\n4\n, which is the place wherefibis supposed to put its result. Due to\nthe presense of polymorphic recursion in the region inference system, the recursive\ncalls offibuse regionsdifferentfrom\\\n3\nand\\\n4\n(and the two recursive calls use\nseparate regions). For example, the first call first reserves space for the result of the\ncall (\\\n5\n), then reserves space for the actual argument (\\\n8\n), then creates the actual\nargument, performs the call, de-allocates the actual argument, and uses the result,\ntill it can be discarded (after the +).\nTheletrecstores the following cyclic region function closure in the store at\nsome new address,a:\n(\\\n3\n\\\n4\n,x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n])\nAssuming that\\\n13\nis bound tor\n3\n, the application offibto 15 near the end of the\nprogram stores the following function closure in the region denoted by\\\n12\n:\n(x,if...,[fib[a],[\\\n1\n[r\n1\n,\\\n2\n[r\n2\n,\\\n3\n[r\n3\n,\\\n4\n[r\n1\n])\n122\nTOFTE AND TALPIN\n\nFile: 643J261315 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2129 Signs: 1556 . Length: 52 pic 10 pts, 222 mm\nFIG. 2.The Fibonacci function annotated with regions. The result will be a single integer in\\\n1\n.\nWe see that region inference has produced allocations and de-allocations very\nsimilar to those of a traditional stack-based implementation. Indeed, the maximal\nmemory usage in this example is proportional to the maximum depth of the recur-\nsion, as it would be in a pure stack discipline.\n4.4. Design Choices\nThe region-based semantics relies on a number of design choices, some of which\nare crucial.\nFirst, it is crucial that the sets RegName and OffSet can be any (denumerable)\nsets. We do not assume that these sets are ordered or that there is any notion of\naddress locality. Thus no particular physical implementation of the region stack is\nbuilt into the theory. This is essential since real computers have a flat address space,\nwhereas the region stack conceptually is two-dimensional. The particular implemen-\ntation choice used in the ML Kit is described in [5].\nSecond, it is crucial that the semantics uses so-called ``flat environments''; the\nalternative (``linked environments'') is to represent the environment as a linked list\nof environment frames. This is a popular representation in block-structured\nlanguages and in some functional languages. With linked environments, closure\ncreation is cheap, but it does not work with regions, at least if the environment\nframes are interspersed with regions on one stack! In Example 4.2, it is essential\nthat we copy the environment into the closure for*y.(*1x,y)at\\\n1\nso that\n123\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261316 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2855 . Length: 52 pic 10 pts, 222 mm\nthe binding forxis not destroyed when we leave the scope ofxand\\\n6\nand hence\npop the stack.\nThere are also some inessential choices. There is no need to represent all objects\nboxed (in the ML Kit, integers and other values that fit in one machine word are\nrepresented unboxed). Recursion could probably have been implemented using\nunfolding of closures rather than cycles in the store. Finally, there is no deep need\nto keep the region environment and the variable environment separate in closures\n(the ML Kit merges the two) but we do so to make it clear that region names are\nnot values.\n4.5. Properties of Region-Based Evaluation\nWe can now state formally that the complete evaluation of an expression does\nnot decrease the store. For arbitrary finite mapsf\n1\nandf\n2\n, we say thatf\n2\nextends\nf\n1\n, writtenf\n1\n\u001ff\n2\n, if Dom(f\n1\n)\u001fDom(f\n2\n) and for allx# Dom(f\n1\n),f\n1\n(x)=f\n2\n(x). We\nthen say thats\n2\nsucceeds s\n1\n, writtens\n2\nc\n=\ns\n1\n(ors\n1\nC\n=\ns\n2\n), if Dom(s\n1\n) \u001fDom(s\n2\n) and\ns\n1\n(r)\u001fs\n2\n(r), for allr# Dom(s\n1\n).\nLemma4.1.If s,VE,R|&e\u0014v,s$thenDom(s) =Dom(s$ ) andsC\n=\ns$.\nThe proof is a straightforward induction on the depth of inference ofs,VE,\nRE|&e\u0014v,s$. The formula Dom(s)=Dom(s$) in Lemma 4.1 expresses that the\nstore resulting from the elaboration has neither more nor fewer regions than the\nstore in which the evaluation begins, although other regions may have been\nallocated temporarily during the evaluation. The evaluation ofemay write values\nin existing regions, so it is possible to haves(r)/s$(r), for somer. However,enever\nremoves or overwrites any of the values that are ins.\n4.6. Syntactic Equality of Expressions\nLete$ be a target expression. The set of program variables that occur free ine$\nis written fpv(e$ ). The set of region variables that occur free ine$ is frv(e$).\nBoth in the source language and in the target language, we shall consider two\nexpressions equal, if they can be obtained from each other by renaming of bound\nvariables. This extends to closures. For example,(x\n1\n,e\n1\n,VE\n1\n)and(x\n2\n,e\n2\n,VE\n2\n)\nare considered equal ifVE\n1\n=VE\n2\nand*x\n1\n.e\n1\nand*x\n2\n.e\n2\nare equal in the above\nsense. Moreover, we even allow that the free variables of*x\n2\n.e\n2\nmay be a renaming\nof the free variables of*x\n1\n.e\n1\n, provided of course that the corresponding change\nhas been made in the domain ofVE\n1\nto obtainVE\n2\n. (Loosely speaking, this\ncorresponds to admitting value environments as declarations and then allowing the\nusual renamings permitted in an expression of the formletVE\n1\nin*x\n1\n.e\n1\nend.)\nFinally, we consider(x,e,VE\n1\n)and(x,e,VE\n2\n)equal, ifVE\n1\nafpv(*x.e)=\nVE\n2\nafpv(*x.e). This allows us to introduce and delete unused program variables\nin the domains of environments inside closures.\nSimilarly, for any region closure(\\\u0011,x,e,VE,R)we allow the renamings of\n\\\u0011,x, fpv(e) and frv(e) and the introduction or elimination of unused program\n124\nTOFTE AND TALPIN\n\nFile: 643J261317 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2899 Signs: 1852 . Length: 52 pic 10 pts, 222 mm\nvariables that one would expect if the closure were written letVE,Rin*\\\u0011,x\n1\n.e\n1\nend.\nEquality on semantic objects in each of the two dynamic semantics is then\ndefined to be the smallest equivalence relation which is closed under the three trans-\nformations described above.\n5. REGION INFERENCE\nThe rules that specify which translations are legal are called theregion inference\nrules. In Section 5.1 we present region types and other semantic objects that occur\nin the region inference rules; the rules themselves are presented in Section 5.2. In\nSections 5.3 and 5.4 we state and prove properties of the region inference system;\nfor example, that the translation is a refinement of Milner's type discipline.\n5.1. Semantic Objects\nRegion Types. We assume three denumerably infinite, pairwise disjoint sets:\n:# TyVartype variables\n\\orp# RegVarregion variables\n=# EffectVareffect variables\nTo avoid too many subscripts and primes, we use bothp(for ``place'') and\\to\nrange over region variables. Anatomic effectis a term of the form\n'::=put(\\)|get(\\)|=atomic effect\nWe use'to range over atomic effects. Aneffectis a finite set of atomic effects. We\nuse.to range over effects. For a concrete example, the effect of expressione$in\nExample 4.2 is[put(\\\n1\n),put(\\\n2\n),put(\\\n3\n)].\nTypes and types with places are given by\n{::=int|:|+w\u0014\n=..\n+type\n+::=({,\\)type with place\nIn a function type\n+w\u0014\n=..\n+$(16)\nthe object=..is called anarrow effect. Formally, an arrow effect is a pair of an\neffect variable and an effect; we refer to=and.as thehandleand thelatent effect,\nrespectively. If a functionfhas type (16) then the latent effect.is to be interpreted\nas the effect of evaluating the body off. Effect variables are useful for expressing\ndependencies between effects. For example, the target expression\ne$#(*f.(*x.f(x))at\\\n4\n)at\\\n5\n125REGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261318 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3490 Signs: 2507 . Length: 52 pic 10 pts, 222 mm\ncan be given type\n{\ne$\n=\n_\n((:\n1\n,\\\n1\n)ww\u0014\n=\n1\n.<\n(:\n2\n,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(17)\n((:\n1\n,\\\n1\n)wwwww\u0014\n=\n3\n.[get(\\\n3\n),=\n1\n]\n(:\n2\n,\\\n2\n),\\\n4\n)\nIn (17) the last occurrence of=\n1\nindicates that for alle\n1\nande\n2\nof the appropriate\ntype, ife\n1\nevaluates to some function,g, ande\n2\nevaluates to some value,v, then\nthe evaluation of (e$e\n1\n)e\n2\nmay involve an application ofg. (As it happens, the\nevaluation would indeed involve an application ofg, but the type does not\nexpress that.)\nEquality of types is defined by term equality, as usual, but up to set equality of\nlatent effects. For example, the arrow effects=.[put(\\),get(\\$)]and=.[get(\\$),\nput(\\)]are considered equal.\nOne might wonder why we have a pair=..on the function arrow rather than\njust, say, an effect.. The reason is that the region inference algorithms we use rely\non unification, just as ML type inference does [7]. Thus the effect sets on function\narrows pose a problem for the existence of principal unifiers. A solution is to use\narrow effects together with certain invariants about the use of effect variables. The\nbasic idea is that effect variables uniquely ``stand for'' effects: if=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree formed by the inference algorithm and=\n1\n==\n2\nthen it will\nalso be the case that.\n1\n=.\n2\n. Moreover, if two arrow effects=\n1\n..\n1\nand=\n2\n..\n2\nboth\noccur in a proof tree and=\n2\n#.\n1\nthen.\n2\n\u001f.\n1\n: the presence of=\n2\nin.\n1\nimplies\nthat.\n2\nsubsumes the entire effect.\n1\nwhich=\n1\nstands for. With these repre-\nsentation invariants and using the special notion of substitution defined below,\none can prove the existence of principal unifiers, even though types ``contain''\neffects (which are sets). A detailed account of how this is done is beyond\nthe scope of this paper. Also, the invariants mentioned above are not needed for\nproving the soundness of region inference, so we shall not consider them in what\nfollows.\nSubstitution.Atype substitutionis a map from type variables to types; we use\nS\nt\nto range over type substitutions. Aregion substitutionis a map from region\nvariables to region variables; we useS\nr\nto range over region substitutions. Aneffect\nsubstitutionis a map from effect variables to arrow effects; we useS\ne\nto range over\neffect substitutions. Asubstitutionis a triple (S\nt\n,S\nr\n,S\ne\n); we useSto range over\nsubstitutions. Substitution on types, region variables, and effects is defined as\nfollows. LetS=(S\nt\n,S\nr\n,S\ne\n); then\nEffects.\nS(.)=[put(S\nr\n(\\)) |put(\\)#.]\n_[get(S\nr\n(\\)) |get(\\)#.]\n_['|_=,=$,.$.=#.7=$..$=S\ne\n(=)7'#[=$]_.$].\n126\nTOFTE AND TALPIN\n\nFile: 643J261319 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3541 Signs: 1727 . Length: 52 pic 10 pts, 222 mm\nTypes and Region Variables.\nS(int)=intS(:)=S\nt\n(:)S(\\)=S\nr\n(\\)\nS({,\\)=(S({),S(\\))\nS(+w\u0014\n=..\n+$)=S(+)wwwww\u0014\n=$.(.$_S(.))\nS(+$ ),where=$..$=S\ne\n(=).\nFor a concrete example, consider the substitutionS=(S\nr\n,S\nt\n,S\ne\n), where\nS\ne\n(=)=\n{\n=\n8\n.[get(\\\n1\n),put(\\\n2\n)]\n=\nif===\n1\n;\notherwise\nS\nt\n(:)=\n{\nint\n:\nif:=:\n1\nor:=:\n2\n;\notherwise\nS\nr\n(\\)=\\for all\\\nwhere=\n1\n,\\\n1\n,\\\n2\n,:\n1\nand:\n2\nrefer to (17). Now we have\nS({\ne$\n)=\n_\n((int,\\\n1\n)wwwwww\u0014\n=\ng\n.[get(\\\n1\n),put(\\\n2\n)]\n(int,\\\n2\n),\\\n3\n)wwww\u0014\n=\n2\n.[put(\\\n4\n)]\n(18)\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n)\nThis more specific type fore$ is appropriate ife$ occurs in the application expression:\ne$((*n:(int,\\\n1\n).(n+1)at\\\n2\n)at\\\n3\n)(19)\nfor which one will then be able to infer the type and place\n((int,\\\n1\n)wwwwwwwwww\u0014\n=\n3\n.[get(\\\n1\n),get(\\\n3\n),put(\\\n2\n),=\n8\n]\n(int,\\\n2\n),\\\n4\n).\nIn applying substitutions to semantic objects with bound names (e.g., a type\nscheme) bound variables are first renamed to avoid capture, when necessary.\nSubstitutions compose; Id is the identity substitution.\nThesupportof a type substitutionS\nt\n, written Supp(S\nt\n), is the set[:# TyVar |\nS\nt\n(:){:]. Similarly for region substitutions. Thesupportof an effect substitution\nS\ne\n, written Supp(S\ne\n), is the set[=# EffectVar |S\ne\n(=){=.<]. The support of a sub-\nstitutionS=(S\nt\n,S\nr\n,S\ne\n), written Supp(S), is defined as Supp(S\nt\n)_Supp(S\nr\n)_\nSupp(S\ne\n). WheneverS\nt\n,S\nr\n, andS\ne\nare finite maps of the appropriate types we take\nthe liberty of considering the triple (S\nt\n,S\nr\n,S\ne\n) a substitution, without explicitly\nextending the finite maps to total maps.\nType Schemes. Type schemes resemble the type schemes of Damas and Milner\n[7] but with additional quantification over region variables and effect variables,\n_::=\\().{simple type scheme\n|\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\ncompound type scheme,\n127\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261320 . By:XX . Date:20:02:97 . Time:10:30 LOP8M. V8.0. Page 01:01\nCodes: 2548 Signs: 1879 . Length: 52 pic 10 pts, 222 mm\nwheren\u001e0,k\u001e0 andm\u001e0. The following definitions are stated for compound\ntype schemes but are easily extended to simple type schemes. For a type scheme\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\n, thebound variables of _, written bv(_), are the set\n[\\\n1\n, ...,\\\nk\n,:\n1\n, ...,:\nn\n,=\n1\n, ...,=\nm\n].\nWe sometimes write the sequences of bound variables as vectors::\u0011,\\\u0011, and=\u0011, respec-\ntively. Two type schemes areequivalentif they can be obtained from each other by\nrenaming and reordering of bound variables. A type{$isaninstance of _, written\n_\u001e{$, if there exists a substitutionSsuch that Supp(S) \u001fbv(_) andS({)={$.\nWhen we want to makeSexplicit, we say that{$ is an instance of_ via S, written\n_\u001e{$via S. Equivalent type schemes have the same instances.\nWe sometimes write{as a shorthand for the simple type scheme\\().{, not to\nbe confused with the compound type scheme\\().{\n\u0014\n, since compound type schemes\nhave a special significance: they are used exclusively as types of region-polymorphic\nfunctions, even for those region-polymorphic functions that take an empty list of\nactual region parameters. The underlining serves to make it clear whether a type\nscheme is to be regarded as simple or compound.\nAtype environmentis a finite map from program variables to pairs of the form\n(_,\\). We useTEto range over type environments.\nThe semantic objects are summarised in Fig 3. The notion of free variables extend\nto larger semantic objects, such as type environments. (For example, a type variable\nis said to occur free inTEif it occurs free inTE(x), for somex.) For any semantic\nobjectA, frv(A) denotes the set of region variables that occur free inA; ftv(A)\ndenotes the set of type variables that occur free inA; fev(A) denotes the set of effect\nvariables that occur free inA; and fv(A) denotes the union of the above.\nFIG. 3. Semantic objects of region inference.\n128TOFTE AND TALPIN\n\nFile: 643J261321 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3454 Signs: 1626 . Length: 52 pic 10 pts, 222 mm\n5.2. The Inference System\nThe inference rules allow the inference of statements of the form\nTE|&eOe$:+,.\nread:in TE,e translates to e$,which has type and place + and effect .. The region\ninference rules are non-deterministic: givenTEande, there may be infinitely many\ne$,+, and.satisfyingTE|&eOe$:+,.. This non-determinism is convenient to\nexpress type-polymorphism, but we also use it to express freedom in the choice of\nregion variables. Indeed, the region inference rules allow one to put all values in a\nsingle region, although, in practice, this would be the worst possible choice.\nRegion-based Translation of Expressions[TE|&e\u0014e$:+,.]\nTE|&cOcat\\:(int,\\),[put(\\)](20)\nTE(x)=({,\\)\nTE|&xOx:({,\\),<\n(21)\nTE(f)=(_,\\$)_=\\\\\n1\n}}}\\\nk\n:\u0011=\u0011.{\n1\n_\u001e{viaS.=[get(\\$),put(\\)]\nTE|&fOf[S(\\\n1\n), ...,S(\\\nk\n)]at\\:({,\\),.\n(22)\nTE+[x[+\n1\n]|&eOe$:+\n2\n,.\n.\u001f.${=+\n1\nw\u0014\n=..$\n+\n2\nfrv(e$ ) \u001ffrv(TE,{)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(23)\nTE|&e\n1\nOe$\n1\n:(+$w\u0014\n=..\n+,\\),.\n1\nTE|&e\n2\nOe$\n2\n:+$,.\n2\nTE|&e\n1\ne\n2\nOe$\n1\ne$\n2\n:+,._.\n1\n_.\n2\n_[=,get(\\)]\n(24)\nTE|&e\n1\nOe$\n1\n:({\n1\n,\\\n1\n),.\n1\nTE+[x[({\n1\n,\\\n1\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letx=e\n1\nine\n2\nendOletx=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(25)\nTE+[f[(\\\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&*x.e\n1\nO*x.e$\n1\nat\\\n0\n:({,\\\n0\n),.\n1\nfv(:\u0011,\\\u0011,=\u0011)&fv(TE,.\n1\n)=<\nTE+[f[(\\:\u0011\\\u0011=\u0011.{\n\u0014\n,\\\n0\n)]|&e\n2\n\u0014e$\n2\n:+,.\n2\nTE|&letrecf(x)=e\n1\nine\n2\nendO\nletrecf[\\\u0011](x)at\\\n0\n=e$\n1\nine$\n2\nend:+,.\n1\n_.\n2\n(26)\nTE|&eOe$:+,.\\\u0012frv(TE,+)\nTE|&eOletregion\\ine$end:+,.\"[put(\\),get(\\)]\n(27)\nTE|&eOe$:+,.=\u0012fev(TE,+)\nTE|&eOe$:+,.\"[=]\n(28)\nIn Rule 21, note that the effect of referring toxis empty; this is because the\neffects only relate to access of the region stores, not the environmentsVEandR.\nIn Rule 22 the instances of the bound region variables become actual region\n129\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261322 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3655 Signs: 2838 . Length: 52 pic 10 pts, 222 mm\nparameters in the target expression. The resulting effect includesget(\\$ ) andput(\\),\nfor we access the region closure in\\$ and create an ordinary function closure in\\.\nIn Rule 23, the effect of creating the function closure at region\\is simply\n[put(\\)]. Following Talpin and Jouvelot [24], one is allowed to make the infor-\nmation about the function less precise by increasing the latent effect. This is useful\nin cases where two expressions must have the same functional type (including the\nlatent effects on the arrows) but may evaluate to different closures. The freedom to\nincrease effects is also useful when one wants to prove that every well-typed Exp-\nprogram of Milner [18] can be translated with the region inference rules\u0015\u0015see\nLemma 5.2 below. We shall explain the side-condition frv(e$)\u001ffrv(TE,{)ina\nmoment.\nIn Rule 24 we see that the latent effect is brought out when the function is\napplied. Theget(\\) in the resulting effect is due to the fact that we must access the\nclosure at\\in order to perform the function application.\nIn Rule 25 notice that the type scheme ofxhas no bound variables of any kind.\nThe absence of bound type variables is due to the value restriction (see Section 3.2).\nThe absence of bound region variables is due to the fact that introducing bound\nregion variables (and hence delaying the evaluation ofe$\n1\n) may change the seman-\ntics of the program ife$\n1\nis not a value. (Whene$\n1\nis a value, one can rewrite thelet\nto aletrecand use Rule 26 to obtain region polymorphism.) Finally, one could\nallow quantification of effect variables in Rule 25, as indeed we did in [25], but\neffect quantification in simple type schemes appears to be of limited practical use\nand it complicates the proof of Lemma 8.3 below considerably [25], so we have\nabandoned it.\nIn Rule 26, note thatfis region-polymorphic, but not type-polymorphic, inside\ne\n1\n, its own body. Ine\n2\n, however,fis polymorphic in types, regions and effects.\nWithout the limitation on type-polymorphism insidee\n1\n, region inference would not\nbe decidable.\nRule 27 concerns the introduction ofletregionexpressions. The basic idea,\nwhich goes back to early work on effect systems [17], is this. Suppose\nTE|&eOe$:+,.and assume that\\is a region variable which does not occur free\ninTEor in+(typically,\\occurs free in., indicating that\\is used in the computa-\ntion ofe$).Then \\ is purely local to the evaluation of e$,in the sense that the rest\nof the computation will not access any value stored in \\.\nExample. Once again, consider the expressione$ from Section 1. Lete$\n0\nbe the\nsubexpression\ne$\n0\n#let x = (2 at\\\n2\n,3at\\\n6\n)at\\\n4\nin (*y.(*1x ,y)at\\\n1\n)at\\\n5\nend\nThe type environment in force when this expression is produced isTE\n0\n=[]; the\ntype and place ofe$\n0\nis\n+\n0\n=((int,\\\n3\n)wwwwwww\u0014\n=\n1\n.[get(\\\n3\n),put(\\\n1\n)]\n((int,\\\n2\n)V(int,\\\n3\n),\\\n1\n),\\\n5\n);\n130\nTOFTE AND TALPIN\n\nFile: 643J261323 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3741 Signs: 2780 . Length: 52 pic 10 pts, 222 mm\nand the effect ofe$\n0\nis.\n0\n=[put(\\\n2\n),put(\\\n6\n),put(\\\n4\n),put(\\\n5\n)]. Note that\\\n6\nis the\nonly region variable which occurs free in.\n0\nbut occurs free neither inTE\n0\nnor in\n+\n0\n. Rule 27 allows us to discharge\\\n6\n, resulting in the effect[put(\\\n2\n),put(\\\n4\n),\nput(\\\n5\n)]and the ``letregion\\\n6\nin...end'' ine$.\nNext, Rule 28 allows one to discharge an effect variable from the effect of an\nexpression; noletregionis introduced, since the discharge does not influence\nevaluation.\nWe owe the reader an explanation for the side-condition frv(e$)\u001ffrv(TE,{)in\nRule 23. It is often the case that every region variable which occurs free in a trans-\nlated expression occurs free either in the type or in the effect of the expression.\nHowever, here is an example where this does not hold,\n[]|&(*f.1)(*x.2)O((*f.1at\\\n1\n)at\\\n2\n)((*x.2at\\\n3\n)at\\\n4\n):(int,\\\n1\n),.\nwhere.=[put(\\\n2\n),put(\\\n4\n),get(\\\n2\n),put(\\\n1\n)]. Here we see that\\\n3\nis free in the\ntarget expression but occurs free neither in the effect nor in the resulting type and\nplace. The reason is that 2at\\\n3\nwill never be evaluated (i.e., it is ``dead code''). The\npurpose of the side-condition on Rule 23 is to prevent the body of the function from\ncontaining free region variables which only occur in dead code. Such region\nvariables complicate arguments about renaming of region variables, specifically\nthey complicate the proof of Lemma 8.3, if allowed. We therefore impose the side-\ncondition on Rule 23. Note, however, that one can always satisfy this side-condition\nby repeatedly applying Rule 27 to the function body, just before applying Rule 23,\nfor in Rule 27 there is no requirement that\\must occur free in..\nAs mentioned earlier, the region inference rules give rise to a static semantics\nfor the target language: one just consistency replaces sentences of the form\nTE|&eOe$:+,.byTE|&e$:+,.. However, we prefer the present formulation,\nwhich emphasises that the rules specify a translation.\n5.3. Region Inference Is a Refinement of Milner's Type System\nIn this section we prove that the region inference system is a refinement of\nMilner's type discipline [18] in the sense that an expression can be translated with\nthe region rules if and only if it is well typed according to Milner's type discipline,\nas defined in Section 3.2. In particular, this shows that the problem of determining\nwhether a closed expression can be region-annotated is decidable.\nWe first show that an expression can be translated only if it is well typed. To this\nend, we define a function,?, (for ``projection'') from semantic objects in the region\nrules to the semantic objects in the Milner rules:\n?(:)=:;?(int)=int;?(+w\u0014\n=..\n+$)=?(+)\u0014?(+$)\n?({,\\)=?({);?(\\\\\u0011:\u0011=\u0011.{)=\\:\u0011.?({);?(_,\\)=?(_);?(TE)=?bTE.\nLemma5.1.If TE|&eOe$:+,. then ?(TE)|&e:?(+).\n131\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261324 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3850 Signs: 2390 . Length: 52 pic 10 pts, 222 mm\nThe proof is a straightforward induction on the depth ofTE|&eOe$:+,..\nNext we show that every well-typed term can be translated. To this end we define\na relation,R, between Milner's objects and ours. Let\\\n0\nbe some fixed region variable\nand let=\n0\nbe some fixed effect variable. The basic idea is to choose\\\n0\neverywhere\nwe need a region variable in the translation and to choose=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\neverywhere we need an arrow effect in the translation. Unfortunately, we cannot\nsimply makeRa map, because of the distinction between simple and compound\ntype schemes. So we defineRinductively as follows:\n:R:intRint\n{R+ {$R+$\n({\u0014{$)R(+wwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+$)\n{R{$\n\\().{R\\().{$\n{R{$\n\\:\u0011.{R\\:\u0011.{$\n{R{$\n{R({$,\\\n0\n)\n_R_$\n_R(_$,\\\n0\n)\nDom(TE)=Dom(TE$)\\x# Dom(TE).TE(x)RTE$(x)\nTE R TE$\nClearly, for everyTEthere exists aTE$ such thatTE R TE$.\nLemma5.2.If TE|&e:{ and TE R TE$then TE$|&eOe$:+,. for some e$,+ and\n. which satisfy { R +, frv(+)=[\\\n0\n], frv(e$)\u001f[\\\n0\n] and .\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n].\nProof.By induction on the depth of inference ofTE|&e:{. We show only two\ncases, as the rest are straightforward.\n[e#x].By assumption we haveTE(x)=_and_\u001e{. SinceTE R TE$we\nthen haveTE$(x)=(_$,\\\n0\n) for some_$ which satisfies_R_$. Now_$ may be\nsimple or compound, but if it is compound it has no quantified region variables. Let\n+=({$,\\\n0\n) be the unique type with place satisfying{R+. Then_$\u001e{$ and the\ndesired conclusion follows either by Rule 21 or by Rule 22.\n[e#*x.e\n1\n]. Here{={\n1\n\u0014{\n2\nfor some{\n1\nand{\n2\nandTE|&*x.e\n1\n:{must have\nbeen inferred from the premiseTE+[x[{\n1\n]|&e\n1\n:{\n2\n. We have (TE+[x[{\n1\n])\nR(TE$+[x[+\n1\n]), where+\n1\nis the unique type with place related to{\n1\n. By induction\nthereexiste$\n1\n,+\n2\nand.\n0\nsuchthatTE$+[x[+\n1\n]|&e\n1\nOe$\n1\n:+\n2\n,.\n0\n,\nfrv(+\n2\n)=[\\\n0\n], frv(e$\n1\n)\u001f[\\\n0\n]and.\n0\n\u001f[get(\\\n0\n),put(\\\n0\n),=\n0\n]. Now Rule 23 con-\nveniently allows us to use this inclusion to proveTE$|&*x.e\n1\nO*x.e$\n1\nat\n\\\n0\n:(+\n1\nwwwwwww\u0014\n=\n0\n.[get(\\\n0\n),put(\\\n0\n),=\n0\n]\n+\n2\n,\\\n0\n),[put(\\\n0\n)]fromwhichthedesiredresults\nfollows.K\n5.4. Substitution Lemma\nLemma5.3.For all substitutions S,if TE|&eOe$:+,. then S(TE)|&eO\nS(e$):S(+),S(.).\nThe proof is a straightforward induction on the depth of the inference of\nTE|&eOe$:+,., using appropriate variants ofSin the case forletrec.\nNext, we shall state a lemma to the effect that the operation of making type\nschemes in the type environment more type-polymorphic does not decrease the set\n132\nTOFTE AND TALPIN\n\nFile: 643J261325 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3414 Signs: 2513 . Length: 52 pic 10 pts, 222 mm\nof possible translations. Formally, we say that_\n1\nis at least as type-polymorphic as\n_\n2\n, written_\n1\nc\n=\n_\n2\n,if_\n1\nand_\n2\nare identical, or_\n1\nand_\n2\nare both compound\nand_\n1\n=\\:\u0011._\n2\n, for some:\u0011. Furthermore, we writeTE\n1\nc\n=\nTE\n2\nif Dom(TE\n1\n)=\nDom(TE\n2\n) and, for allx# Dom(TE\n1\n), if (_\n1\n,\\\n1\n)=TE\n1\n(x) and (_\n2\n,\\\n2\n)=TE\n2\n(x)\nthen_\n1\nc\n=\n_\n2\nand\\\n1\n=\\\n2\n.\nLemma5.4.If TE|&eOe$:+,. and TE$c\n=\nTE then TE$|&eOe$:+,..\nWe omit the proof, which is a straightforward induction on the depth of inference\nofTE|&eOe$:+,.. We note, however, that the similar statement concerning\nregion polymorphism (replacing_=\\:\u0011=\u0011.{\n\u0014\nby_$=\\\\\u0011:\u0011=\u0011.{\n\u0014\n) is not true, because\napplications of region functions in the target expression can be affected by such a\nchange.\nFortunately, it is precisely the ability to make assumed type schemes more type-\npolymorphic that we need.\n6. USING EFFECTS TO DESCRIBE CONTINUATIONS\nFor the proof of the soundness of the translation scheme, we need to relate the\nvalues of the dynamic semantics of the source and target language. We refer to this\nrelation as theconsistencyrelation.\nSince all values are addresses in the target language semantics, the consistency\nrelation must involve stores. Consistency also naturally depends on types: at type\nint, source level integers can only be consistent with pointers to integers in the\ntarget; at a functional type, only closures can be related, and so on. The region\ninference rules yield expressions, types with places, and effects\u0015\u0015all of which can\ncontain free occurrences of region variables. To relate these region variables to the\nregion names which identify regions at runtime, we need a region environment,R,\nand the following definition:\nDefinition6.1. Aregion environment Rconnects effect.to stores, if frv(.)\u001f\nDom(R) and for all\\# frv(.),R(\\) # Dom(s).\nBased on these considerations, assume that we have defined consistency as a\nrelation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal\nwhereC(R,+,v,s,v$) is read:in region environment R and store s,source value v is con-\nsistent with target value v$at type with place +. The obvious idea would now be some-\nhow to lift this relation first from types with places to type schemes,C(R,_,v,s,v$),\nand then, by pointwise extension, to environments, (R,TE,E,s,VE). We might then\ntry to prove the following statement:\nConjecture6.1.If TE|&eOe$:+,.,and E|&e\u0014v andC(R,TE,e,s,VE)and R\nconnects . to s then there exists a store s$and a target value v$such that s,VE,\nR|&e$\u0014v$,s$andC(R,+,v,s$,v$).\n133\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261326 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3774 Signs: 3146 . Length: 52 pic 10 pts, 222 mm\nHowever, there is a problem with this conjecture. Informally, it states that con-\nsistency is preserved by evaluation. Unfortunately, we cannot expect that to hold!\nTo see what the problem is, consider Example 4.2 once more. According to the\nconjecture, at point (b) we should have that the source language closure\n(y,(*1x,y),[x[(2, 3)])and the closure found in regionr\n5\nare consistent. In\na sense they are consistent: application of the two closures map consistent\narguments to consistent results. But notice that the consistency which used to exist\nbetween the source environment[x[(2, 3)]and its representation in the target\nsemantics was partly destroyed when the regionr\n6\nwas popped from the region\nstack. Thus we see that, intuitively speaking, consistency gradually deteriorates\nduring computation. The saving factor, it turns out, is that there is always enough\nconsistency left for the rest of the computation to succeed, without running into any\nof the inconsistencies!\nTo make these intuitions precise, we need some notion of ``consistency with\nrespect to the rest of the computation.'' One possibility is to work explicitly with\ncontinuations or evaluation contexts. However, we have not explored this\npossibility, since all we need for the purpose of the soundness proof is a very simple\nsummary of which regions are accessed by the rest of the computation. Specifically,\nit suffices to summarise the rest of the computation by an effect,.$, which describes\nwhich of the currently existing regions are accessed by the rest of the computation.\nThus we define a relation\nC\u001fRegEnv_TypeWithPlace_Val_Store_TargetVal_Effect,\nwhereC(R,+,v,s,v$,.$), also writtenC(R,+,v,s,v$) w.r.t..$, is read:at type with\nplace +,in region environment R and store s,source value v is consistent with target\nvalue v$with respect to the effect .$ (where.$ represents the effect of the rest of the\ncomputation). In our example,.$is[put(\\\n3\n),get(\\\n5\n),put(\\\n1\n)], connected via the\nregion environment to regionsr\n3\n,r\n5\nandr\n1\n. The fact that the rest of the computa-\ntion does not access the current contents ofr\n6\nis evident from the fact that no\nregion variable free in.$ is connected tor\n6\n! That is why the environments in the\ntwo closures are consistent with respect to the rest of the computation. The second\nversion of our conjecture becomes:\nConjecture6.2. IfTE|&eOe$:+,.andE|&e\u0014vandC(R,TE,e,s,VE) w.r.t.\n(._.$) andRconnects._.$tosthen there exist a stores$ and a target value\nv$ such thats,VE,R|&e$\u0014v$,s$ andC(R,+,v,s$,v$) w.r.t..$.\nIn other words, if we start out with consistency to cover both the evaluation of\ne$ (whose effect is.) and the rest of the computation (whose effect is.$) then after\nthe computation ofe$, we will have enough consistency left for the rest of the\ncomputation.\nHowever, Conjecture 6.2 is not quite strong enough to be proved by induction.\nConsider a source language closure(x,e,E)and a target closure(x,e$,VE,R),\nwhich we think of as representing(x,e,E). When the source closure is applied, the\nbodyewill be evaluated in an environmentE+[x[v\n2\n], wherev\n2\nis the argument\n134\nTOFTE AND TALPIN\n\nFile: 643J261327 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2770 Signs: 1579 . Length: 52 pic 10 pts, 222 mm\nto the function. Assuming thatv$\n2\nis some target value consistent withv\n2\n, the corre-\nsponding evaluation in the target language takes the forms,VE+[x[v$\n2\n],\nR|&e$\u0014} } } . However, the region environment in whiche$ is evaluated is not\nnecessarily the same as the region environmentR$ which is in force at the point\nwhere the application takes place, for more regions may have been allocated\nsince the closure was created. Moreover,R$ is important for establishing that\nE+[x[v\n2\n]andVE+[x[v$\n2\n]are consistent, sincev\n2\nandv$\n2\nwill be known to\nbe consistent inR$, not inR. And we must establish consistency ofE+[x[v\n2\n]\nandVE+[x[v$\n2\n]in order to use induction to prove that the results of the func-\ntion applications are consistent.\nExample. Consider the target expression\nletregion\\\n1\nin let x = 3 at\\\n1\nin letregion\\\n2\nin let f=(*y.(x+y)at\\\n0\n)at\\\n2\nin letregion\\\n3\nin f(4at\\\n3\n)\nend\nend\nend\nend\nend\nConsider the point of the evaluation just after the closure forfhas been created.\nLet us say that the region environment isR\n1\n=[\\\n0\n[r\n0\n,\\\n1\n[r\n1\n,\\\n2\n[r\n2\n]. Then\nthe store is\ns\n1\n=[r\n0\n[[],r\n1\n[[o\nx\n[3],r\n2\n[\n[o\nf\n[(y,(x+y)at\\\n0\n,[x[(r\n1\n,o\nx\n)],R\n1\n)].\nWe can reasonably expect to have\nC(R\n1\n,[x[(int,\\\n1\n)],[x[3],s\n1\n,[x[(r\n1\n,o\nx\n)]) w.r.t..\n1\n,(29)\nwhere.\n1\n=[get(\\\n1\n),get(\\\n2\n),put(\\\n0\n)], which is the net effect of the remainder of\nthe computation at that point. (``Expect'' because we have not definedCyet.) Next,\nconsider the point where the actual argument 4 tofhas been stored, the closure\nforfhas been fetched and we are just about to evaluate the body off. Now the\n135\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261328 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3585 Signs: 2629 . Length: 52 pic 10 pts, 222 mm\nregion environment has becomeR\n2\n=R\n1\n+[\\\n3\n[r\n3\n], the store has become\ns\n2\n=s\n1\n+[r\n3\n[[o\n4\n[4]]and we can reasonably expect to have\nC(R\n2\n,(int,\\\n3\n), 4, s\n2\n,(r\n3\n,o\n4\n)) w.r.t..\n2\n,(30)\nwhere.\n2\n=[get(\\\n1\n),get(\\\n3\n),put(\\\n0\n)], i.e., the effect of the continuation at that\npoint. From (29) and (30) we can reasonably expect to obtain\nC(R\n2\n,[x[(int,\\\n1\n),y[(int,\\\n3\n)]\n[x[3,y[4],s\n2\n,[x[(r\n1\n,o\nx\n),y[(r\n3\n,o\n4\n)]) w.r.t..\n2\nBut evaluation of the function body is going to take place inR\n1\n(see Rule 12). Thus\nthe theorem needs to be strong enough to handle the situation that the region\nenvironment in which consistency is established is not the same as the region\nenvironment in which the expression is evaluated. Incidentally, this is similar to the\nsituation in block-structured languages, where an an inner block can call a function\ndeclared in an enclosing block. (Indeed, it appears that although the variable\nenvironments do not obey a stack discipline, the region environments do.)\nWe therefore prove that the theorem holds not just forRbut also for other\nregion environmentsR$ which ``agree'' withR:\nDefinition6.2. LetRandR$ be region environments and let.be an effect. We\nsay thatRandR$ agree on.,ifRafrv(.)=R$afrv(.).\nWe are now able to state the main theorem, which we shall prove, once we have\ndefined the consistency relation:\nTheorem6.1.If TE|&eOe$:+,. andC(R,TE,E,s,VE) w.r.t.._.$and\nE|&e\u0014v and R connects ._.$to s and R$and R agree on ._.$and\nfrv(e$ )\u001fDomR$then there exist s$and v$such that s,VE,R$|&e$\u0014v$,s$and\nC(R$,+,v,s$,v$ ) w.r.t..$.\nThe premise ``frv(e$ ) \u001fDomR$ '' is included only to make the proof simpler; it helps\nto ensure that closures in the target language will not contain free region variables.\nNote that we use the effect of the rest of the computation as an approximation\nto what data is ``live.'' The notion usually employed by garbage collectors (namely\nthat data is live, if it is reachable in the memory graph) is incomparable: we have\nalready seen that data which is reachable in the memory graph is actually dead and\ncan be de-allocated using region inference; conversely, sometimes data which we\nkeep alive in a region is not actually used by the rest of the computation and a\ngarbage collector would detect it.\n7. CONSISTENCY\nFor simplicity, we first present the consistency relation in the form of inference\nrules without reference to the underlying mathematics. We shall later explain that\nthe rules can be viewed as describing a maximal fixed point of a certain monotonic\noperator. For now, it suffices to read the rules as follows: the conclusion of a rule\nholds if and only if the premises hold.\n136\nTOFTE AND TALPIN\n\nFile: 643J261329 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3424 Signs: 2723 . Length: 52 pic 10 pts, 222 mm\nRules 31\u001535 characterize consistency between source values and storable target\nvaluessv(defined in Section 4.1). These rules are used in Rules 36 and 37, to\ncharacterize consistency between source and target values (recall that target values\nare addresses). It is precisely in rules Rule 36 and 37 we see the significance of the\nidea of representing the rest of the computation by the effect.:ifget(\\)\u0012., then\nany claim about consistency of values at region\\is allowed, for\\then denotes\n``garbage''. However, by Rule 36, ifv$=(r,o) # Pdom(s) andr=R(\\) then the value\nstored at addressv$ has to be consistent with the source value,v, as described\nby Rules 34 and 35. (Recall that (r,o) # Pdom(s) abbreviatesr# Dom(s)7\no# Dom(s(r)).) Rule 38 says that consistency of environments is the pointwise\nextension of consistency of values.\nRule 31 should be straightforward. In Rule 32, note thatTEdoes not occur in the\nconclusion of the rule: one has to ``invent'' aTEwhich can justify the target expres-\nsion as a compilation result of the source expression. Also, the environmentsEand\nVEmust be consistent atTE. The region environmentRmay be regarded as the\nregion environment which is in force when the closures are applied; as we saw\nearlier, this is not necessarily the same as the region environment which was in\nforce when the target closure was created (R$ in the rule). For the purpose of the\nsoundness theorem, we clearly need to know thatRandR$ are related somehow,\nand it turns out that it suffices to require that they agree on.. The condition\nfrv(e$)\u001f(R$) ensures that the target closure contains no free region variables; the\ntwo first premises of the rule already ensure that fpv(e$ )\u001fDom(VE), i.e., that the\nclosure contains no free program variables. Again this is good hygiene, which is\nuseful in the proofs (specifically of Lemma 8.3).\nRule 33 is similar to Rule 32, but deals with recursion. For the premises to be\nsatisfied,TEmush havefin its domain. Moreover, since recursion is handled by\nunfolding in the source language semantics, it isE+[f[(x,e,E,f)]andVE\nthat have to be consistent, rather than justEandVE.\nRule 34 is similar to Rule 33, but it relates recursive closures and region function\nclosures at compound type schemes. For simple type schemes, one uses Rule 35\ntogether with Rules 31\u001533.\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..].\ni#Int\nC(R,(int,\\),i,s,i) w.r.t..\n(31)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E,s,VE) w.r.t..\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\nC(R,({,\\),(x,e,E),s,(x,e$,VE,R$)) w.r.t..\n(32)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\nC(R$,TE,E+[f[(x,e,E,f)],s,VE) w.r.t..\nR$ andRagree on.frv(e$ )\u001fDom(R$)\nC(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$))) w.r.t..\n(33)\n137\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261330 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2940 Signs: 1754 . Length: 52 pic 10 pts, 222 mm\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{\n\u0014\nbv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ )\u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\nC(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE) w.r.t..\nC(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$)) w.r.t..\n(34)\nC(R,({,\\),v,s,sv) w.r.t..\nC(R,(\\().{,\\),v,s,sv) w.r.t..\n(35)\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..].\nv$=(r,o)R(\\)=rv$ # Pdom(s)C(R,(_,\\),v,s,s(v$ )) w.r.t..\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(36)\nget(\\)\u0012.\nC(R,(_,\\),v,s,v$ ) w.r.t..\n(37)\nEnvironments[C(R,TE,E,s,VE) w.r.t..].\nDomTE=DomE=DomVE\n\\x# DomTE.C(R,TE(x),E(x),s,VE(x)) w.r.t..\nC(R,TE,E,s,VE) w.r.t..\n(38)\nThe relationCis defined as the maximal fixed point of an operatorF:P(C)\u0014\nP(C), wherePmeans powerset andCis defined by:\nC=RegEnv_TypeWithPlace_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_StoreVal_Effect\n_RegEnv_(TypeScheme_RegVar)_Val_Store_TargetVal_Effect\n_RegEnv_TyEnv_Env_Store_TargetEnv_Effect.\nThe members ofCare referred to as (consistency)claims. We use#to range over\nclaims and1to range over sets of claims. For example, a claim of the form\n(R,(_,\\),v,s,sv,.) is read: (it is claimed that) storable valuesvis consistent with\nsource valuevand has type scheme_and resides at\\in the storesand region\nenvironmentR, with respect to effect..\nNote that (P(C), \u001f) is a complete lattice. We now define an operator\nF:P(C)\u0014P(C). The definition is expressed using the syntax of inference rules,\nbut it could equally well be expressed as a non-recursive definition by cases; for\ngiven1\u001fC,F(1) is defined as the unique set[##C|##F(1) can be inferred by\none of the inference rules]. Since the rules are very similar to rules 31\u001538 we shall\nnot explain them further.\n138\nTOFTE AND TALPIN\n\nFile: 643J261331 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 2699 Signs: 1330 . Length: 52 pic 10 pts, 222 mm\nTypes and Storable Values[(R,+,s,sv,.)#F(1)].\ni#Int\n(R,(int,\\),i,s,i,.)#F(1)\n(39)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E,s,VE,.)#1\nR$ andRagree on.frv(e$ )\u001fDom(R)\n(R,({,\\),(x,e,E),s,(x,e$,VE,R$),.)#F(1)\n(40)\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n(R$,TE,E+[f[(x,e,E,f)],s,VE,.)#1\nR$ andRagree on.frv(e$ ) \u001fDom(R$)\n(R,({,\\),(x,e,E,f),s,(x,e$,VE,R$),.)#F(1)\n(41)\nType Schemes and Storable Values[(R,(_,\\),v,s,sv,.)#F(1)].\nTE+[f[(_,\\)]|&*x.eO*x.e$at\\:({,\\),[put(\\)]\n_=\\\\\n1\n}}}\\\nk\n:\n1\n}}}:\nn\n=\n1\n}}}=\nm\n.{bv(_)&fv(TE,\\)=<\nR$ andRagree on.frv(e$ ) \u001fDom(R$)_[\\\n1\n, ...,\\\nk\n]\n(R$,TE+[f[(_,\\)],E+[f[(x,e,E,f)],s,VE,.)#1\n(R,(_,\\),(x,e,E,f),s,(\\\n1\n, ...,\\\nk\n,x,e$,VE,R$),.)#F(1)\n(42)\n(R,({,\\),v,s,sv,.)#1\n(R,(\\().{,\\),v,s,sv,.)#F(1)\n(43)\nType Schemes and Addresses[(R,(_,\\),v,s,v$,.)#F(1)].\nv$=(r,o)R(\\)=rv$ # Pdom(s)(R,(_,\\),v,s,s(v$),.)#1\n(R,(_,\\),v,s,v$,.)#F(1)\n(44)\nget(\\)\u0012.\n(R,(_,\\),v,s,v$,.)#F(1)\n(45)\nEnvironments[(R,TE,E,s,VE,.)#F(1)].\nDomTE=DomE=DomVE\n\\x# DomTE.(R,TE(x),E(x),s,VE(x),.)#1\n(R,TE,E,s,VE,.)#F(1)\n(46)\nThe operatorFis monotonic:1\u001f1$ impliesF(1)\u001fF(1$ ). Thus, by Tarski's\nfixed point theorem, there exists a greatest fixed point forFand this greatest fixed\npoint is also the greatest set1satisfying1\u001fF(1). Let1\n*\nbe this greatest fixed\npoint.\nDefinition7.1. We takeCto be1\n*\nand we write, for example,C(R,+,v,s,v$)\nw.r.t..to mean (R,+,v,s,v$,.)#C.\n139\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261332 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3395 Signs: 2587 . Length: 52 pic 10 pts, 222 mm\nWe use co-induction to prove properties of the consistency relation: to prove that\na set1of claims is consistent, (i.e., that1\u001f1\n*\n) it suffices to prove1\u001fF(1).\n8. PROPERTIES OF CONSISTENCY\nIn this section we prove important lemmas about the consistency relationC.\nBesides being useful in the proof of the main theorem (Theorem 6.1) they address\nissues such as why it is safe to re-use a de-allocated region even when there are\ndead pointers into it. The lemmas will be proved using a special style of co-induc-\ntive proof, which we call rule-based co-induction.\n8.1. Rule-Based Co-induction\nRule-based co-inductive proof is a style of proof which makes it possible to pre-\nsent a co-inductive proof in a form which resembles ordinary induction on depth\nof inference. The scenario is that a set,C, is given, together with an operator\nF:P(C)\u0014P(C) which is monotonic with respect to set inclusion.Fis defined by\na finite set of inference rules (in our case, Rules 39\u001546). Let1\n*\nbe the maximal\nfixed point ofF:1\n*\n=\u001a[1\u001fC|1\u001fF(1)]. Now consider a lemma which states\nthat, for some given relationR\u001fC_C:\n\\#,#$#Cif##1\n*\nand#R#$ then#$#1\n*\n.(47)\nLet1\nR\n=[#$#C|_##1\n*\n.#R#$]. We refer formally to the members#$of1\nR\nas the\nconsequencesof the lemma. Then (47) can be stated1\nR\n\u001f1\n*\n. By the principle of\nco-induction, it suffices to prove1\nR\n\u001fF(1\nR\n), i.e., that\n\\#$#Cif there exists##1\n*\nsuch that#R#$ then#$#F(1\nR\n).\nThus the co-inductive proof can be organised as follows: take any#$#C. Let##1\n*\nbe such that#R#$. Show#$#F(1\nR\n), i.e.,show that #$can be inferred by the inference\nrules that defineF,using only premises which are themselves consequences of the\nlemma. Often, this is proved by a case analysis on#(note: not#$ ), since##1\n*\nimplies that#can be inferred by an application of one of the rules that defineF\nfrom premises which are themselves in1\n*\n. Note that proving#$#F(1\nR\n) is equiv-\nalent to inferring#$#1\n*\n, using the fixed-point rules forF(in our case:\nRules 31\u001538) and only using premises#\ni\n$ which are themselves consequences of the\nlemma (i.e.,\\i_#\ni\n#1\n*\n.#\ni\nR#\ni\n$). Thus we can word the co-inductive proof almost as\nif it were a normal inductive proof on the depth of inference related to mininal fixed\npoints, using the fixed point rules forFrather than the rules that defineF.\nWe name this style of co-inductive proofrule-based co-induction. We emphasise\nthat a rule-based co-inductive proof isnota proof on ``depth of inference''\u0015\u0015for the\nco-inductive proof establishes claims that are not conclusions of any finite proof\ntree constructed by the fixed point rules.\n140\nTOFTE AND TALPIN\n\nFile: 643J261333 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3101 Signs: 2084 . Length: 52 pic 10 pts, 222 mm\n8.2. Preservation of Consistency\nThe first lemma states that consistency is preserved under decreasing effect and\nincreasing store. This is to be expected: it is easier to obtain consistency with\nrespect to an observer if the observer observes a little rather than a lot; and the\nlarger the store is, the easier it is for it to contain bits of target values which are\nconsistent with a given source value.\nLemma8.1.IfC(R,+,v,s\n1\n,v$ ) w.r.t..\n1\nand.\n2\n\u001f.\n1\nands\n1\nC\n=\ns\n2\nthen\nC(R,+,v,s\n2\n,v$ ) w.r.t..\n2\n.\nLemma 8.1 is a special case of the following lemma:\nLemma8.2.IfC(R\n1\n,+,v,s\n1\n,v$ ) w.r.t..\n1\nand .\n2\n\u001f.\n1\nand R\n2\nand R\n1\nagree on\n.\n2\nand s\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\nthenC(R\n2\n,+,v,s\n2\n,v$ ) w.r.t..\n2\n.Similarly for\nthe other forms ofC.\nNotice that the domain ofs\n1\nneed not be a subset of the domain ofs\n2\nfor\nLemma 8.2 to apply. This is crucial in the proof of the main theorem, in the case\nforletregion. Heres\n1\nwill be the store resulting from a computation which\ninvolves local regions;s\n2\nwill be the result of removing the local regions froms\n1\n.\nThe region variables that are free in.\n1\n, but not in.\n2\n, will be the variables of the\nlocal regions.\nProof.We prove Lemma 8.2 and the corresponding statements concerning the\nother forms of consistency by rule-based co-induction. The cases for the inference\nrules (31) to (38) are arranged according to judgement forms. In all cases, we\nassume\n.\n2\n\u001f.\n1\n(48)\nR\n2\nandR\n1\nagree on.\n2\n(49)\ns\n1\na(Rng(R\n2\nafrv(.\n2\n)))C\n=\ns\n2\n(50)\nTypes and Storable Values[C(R,+,v,s,sv) w.r.t..]. Assume\nC(R\n1\n,+,v,s\n1\n,sv) w.r.t..\n1\n.(51)\nBy the remarks in Section 8 it suffices to prove thatC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\ncan\nbe inferred using Rules 31\u001538, from premises which are themselves conclusions of\nthe lemma.\nRecall that Rules 31\u001538 express thatCis a fixed-point ofF: one has (51) if and\nonly if either the ``premises'' (i.e., the formulae above the line) of Rule 31 hold, or\nthe premises of Rule 32 hold, or the premises of Rule 33 hold. We deal with each\ncase in turn:\n[Rule 31].Here+=(int,\\), for some\\, andv=sv=i, for somei# Int. But\nthenC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, by Rule 31.\n141\nREGION-BASED MEMORY MANAGEMENT\n\nFile: 643J261334 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3153 Signs: 1750 . Length: 52 pic 10 pts, 222 mm\n[Rule 32].Here there exist{,\\,TE,x,e,E,e$,VE,R$ such that (51) is inferred\nfrom premises\nTE|&*x.eO*x.e$at\\:({,\\),[put(\\)](52)\nC(R$,TE,E,s\n1\n,VE) w.r.t..\n1\n(53)\nR$ andR\n1\nagree on.\n1\nfrv(e$ )\u001fDom(R$)(54)\nand+=({,\\),v=(x,e,E), andsv=(x,e$,VE,R$). But then, by (54), (48) and\n(49) we have\nR$ andR\n2\nagree on.\n2\n.(55)\nObviously,R$ agrees with itself on.\n2\nand, by (55) and (50),s\n1\na(Rng(R$afrv(.\n2\n)))\nC\n=\ns\n2\n. Thus, using also (48) and (53), we have that the claim\nC(R$,TE,E,s\n2\n,VE) w.r.t..\n2\n(56)\nis a consequence of the lemma.\n2\nThus by Rule 32 on (52), (55) and (56) we have\nC(R\n2\n,+,v,s\n2\n,sv) w.r.t..\n2\n, as desired (since (56) is a consequence of the lemma).\n[Rule 33].Similar to the previous case.\nType Schemes and Storable Values[C(R,(_,\\),v,s,sv) w.r.t..].Assume\nC(R\n1\n,(_,\\),v,s\n1\n,sv) w.r.t..\n1\n, which can be inferred by Rule 34 or by Rule 35. The\ncase for Rule 34 is similar to the case for Rule 32. So consider the case for Rule 35.\nHere_takes the form\\().{and we haveC(R\n1\n,({,\\),v,s\n1\n,sv) w.r.t..\n1\n. Thus the\nclaimC(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t.\n2\nis a consequence of the lemma. But then, by\nRule 35, we haveC(R\n2\n,(_,\\),v,s\n2\n,sv) w.r.t..\n2\n, as required (since the premise\nused, i.e.,C(R\n2\n,({,\\),v,s\n2\n,sv) w.r.t..\n2\n, is a consequence of the lemma).\nType Schemes and Addresses[C(R,(_,\\),v,s,v$ ) w.r.t..]. Assume that\nC(R\n1\n,(_,\\),v,s\n1\n,v$ ) w.r.t..\n1\n(57)\ninferred by Rule 36 or Rule 37. Case analysis:\n[get(\\)#.\n2\n] Thenget(\\)#.\n1\n, so by (36) there existr,osuch thatv$=(r,o)\nand\nR\n1\n(\\)=r(58)\nv$ # Pdom(s\n1\n)(59)\nC(R\n1\n,(_,\\),v,s\n1\n,s\n1\n(v$ )) w.r.t..\n1\n.(60)\nBy (49) on (58) we have\nR\n2\n(\\)=r(61)\n142\nTOFTE AND TALPIN\n2\nStrictly speaking, we should say ``we have that the claim (R$,TE,E,s\n2\n,VE,.\n2\n) is a consequence\nof the lemma'', but the chosen formulation seems easier to read, so we adopt it throughout.\n\nFile: 643J261335 . By:CV . Date:20:03:97 . Time:13:02 LOP8M. V8.0. Page 01:01\nCodes: 3240 Signs: 2227 . Length: 52 pic 10 pts, 222 mm\nThus (59) and (50) give\nv$ # Pdom(s\n2\n)ands\n2\n(v$)=s\n1\n(v$ ).(62)\nBy (60), (48), (49) and (50) we have that the claimC(R\n2\n,(_,\\),v,s\n2\n,\ns\n1\n(v$ )) w.r.t..\n2\nis a consequence of the lemma; i.e., by (62), that the claim\nC(R\n2\n,(_,\\),v,s\n2\n,s\n2\n(v$ )) w.r.t..\n2\n(63)\nis a consequence of the lemma. Thus Rule 36 on (61), (62), and (63) gives\nC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\n, since the premise used is a consequences of the\nlemma.\n[get(\\)\u0012.\n2\n].ThenC(R\n2\n,(_,\\),v,s\n2\n,v$ ) w.r.t..\n2\nby Rule 37.\nEnvironments[C(R,TE,E,s,VE) w.r.t..].The case for Rule 38 is straight-\nforward.\n8.3. Region Renaming\nIn order to prove that re-use of old regions is safe (Lemma 8.4), we shall want\nto rename region variables that occur free in some semantic objectAbut do not\noccur free in the effect of the rest of the computation, to other region variables that\ndo not occur free in the effect of the rest of the computation. LetS\nr\nbe a region sub-\nstitution. TheyieldofS\nr\n, written Yield(S\nr\n), is the set[S\nr\n(\\)|\\# Supp(S\nr\n)].\nDefinition8.1. LetAbe a semantic object, let.be an effect, and let\nS=(S\nt\n,S\nr\n,S\ne\n) be a substitution. We say thatSisaregion renaming ofAwith\nrespect to.ifSafrv(A) is injective, (Supp(S\nr\n)_Yield(S\nr\n))&frv(.)=3% over\nVGG-16. This gain is solely because of the improved fea-\ntures learned by ResNet.\nMS COCO\nThe MS COCO dataset [26] involves 80 object cate-\ngories. We evaluate the PASCAL VOC metric (mAP @\nIoU = 0.5) and the standard COCO metric (mAP @ IoU =\n.5:.05:.95). We use the 80k images on the train set for train-\ning and the 40k images on the val set for evaluation. Our\ndetection system for COCO is similar to that for PASCAL\nVOC. We train the COCO models with an 8-GPU imple-\nmentation, and thus the RPN step has a mini-batch size of\n8 images (i.e., 1 per GPU) and the Fast R-CNN step has a\nmini-batch size of 16 images. The RPN step and Fast R-\nCNN step are both trained for 240k iterations with a learn-\ning rate of 0.001 and then for 80k iterations with 0.0001.\nTable 8 shows the results on the MS COCO validation\nset. ResNet-101 has a 6% increase of mAP@[.5, .95] over\nVGG-16, which is a 28% relative improvement, solely con-\ntributed by the features learned by the better network. Re-\nmarkably, the mAP@[.5, .95]’s absolute increase (6.0%) is\nnearly as big as mAP@.5’s (6.9%). This suggests that a\ndeeper network can improve both recognition and localiza-\ntion.\nB. Object Detection Improvements\nFor completeness, we report the improvements made for\nthe competitions. These improvements are based on deep\nfeatures and thus should benefit from residual learning.\nMS COCO\nBox refinement.Our box refinement partially follows the it-\nerative localization in [6]. In Faster R-CNN, the final output\nis a regressed box that is different from its proposal box. So\nfor inference, we pool a new feature from the regressed box\nand obtain a new classification score and a new regressed\nbox. We combine these 300 new predictions with the orig-\ninal 300 predictions. Non-maximum suppression (NMS) is\napplied on the union set of predicted boxes using an IoU\nthreshold of 0.3 [8], followed by box voting [6]. Box re-\nfinement improves mAP by about 2 points (Table 9).\nGlobal context.We combine global context in the Fast\nR-CNN step. Given the full-image conv feature map, we\npool a feature by global Spatial Pyramid Pooling [12] (with\na “single-level” pyramid) which can be implemented as\n“RoI” pooling using the entire image’s bounding box as the\nRoI. This pooled feature is fed into the post-RoI layers to\nobtain a global context feature. This global feature is con-\ncatenated with the original per-region feature, followed by\nthe sibling classification and box regression layers. This\nnew structure is trained end-to-end. Global context im-\nproves mAP@.5 by about 1 point (Table 9).\nMulti-scale testing.In the above, all results are obtained by\nsingle-scale training/testing as in [32], where the image’s\nshorter side iss= 600pixels. Multi-scale training/testing\nhas been developed in [12, 7] by selecting a scale from a\nfeature pyramid, and in [33] by using maxout layers. In\nour current implementation, we have performed multi-scale\ntestingfollowing [33]; we have not performed multi-scale\ntraining because of limited time. In addition, we have per-\nformed multi-scale testing only for the Fast R-CNN step\n(but not yet for the RPN step). With a trained model, we\ncompute conv feature maps on an image pyramid, where the\nimage’s shorter sides ares∈ {200,400,600,800,1000}.\n10\n\ntraining dataCOCO trainCOCO trainval\ntest dataCOCO valCOCO test-dev\nmAP@.5@[.5, .95]@.5@[.5, .95]\nbaseline Faster R-CNN (VGG-16)41.521.2\nbaseline Faster R-CNN (ResNet-101)48.427.2\n+box refinement49.929.9\n+context51.130.053.332.2\n+multi-scale testing53.832.555.734.9\nensemble59.037.4\nTable 9. Object detection improvements on MS COCO using Faster R-CNN and ResNet-101.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607+1273.276.5 79.0 70.9 65.5 52.1 83.1 84.7 86.4 52.0 81.9 65.7 84.8 84.6 77.5 76.7 38.8 73.6 73.9 83.0 72.6\nbaselineResNet-10107+1276.479.8 80.7 76.2 68.3 55.9 85.1 85.389.856.7 87.8 69.4 88.3 88.9 80.9 78.4 41.7 78.6 79.8 85.3 72.0\nbaseline+++ResNet-101COCO+07+1285.690.0 89.6 87.8 80.8 76.1 89.9 89.989.675.5 90.0 80.7 89.6 90.3 89.1 88.7 65.4 88.1 85.6 89.0 86.8\nTable 10. Detection results on the PASCAL VOC 2007 test set. The baseline is the Faster R-CNN system. The system “baseline+++”\ninclude box refinement, context, and multi-scale testing in Table 9.\nsystemnetdatamAPareobikebirdboatbottlebuscarcatchaircowtabledoghorse mbike person plantsheepsofatraintv\nbaselineVGG-1607++1270.484.9 79.8 74.3 53.9 49.8 77.5 75.9 88.5 45.6 77.1 55.3 86.9 81.7 80.9 79.6 40.1 72.6 60.9 81.2 61.5\nbaselineResNet-10107++1273.886.5 81.6 77.2 58.0 51.0 78.6 76.6 93.2 48.6 80.4 59.0 92.1 85.3 84.8 80.7 48.1 77.3 66.5 84.7 65.6\nbaseline+++ResNet-101COCO+07++1283.892.1 88.4 84.8 75.9 71.4 86.3 87.8 94.2 66.8 89.4 69.2 93.9 91.9 90.9 89.6 67.9 88.2 76.8 90.3 80.0\nTable 11. Detection results on the PASCAL VOC 2012 test set (http://host.robots.ox.ac.uk:8080/leaderboard/\ndisplaylb.php?challengeid=11&compid=4). The baseline is the Faster R-CNN system. The system “baseline+++” include\nbox refinement, context, and multi-scale testing in Table 9.\nWe select two adjacent scales from the pyramid following\n[33]. RoI pooling and subsequent layers are performed on\nthe feature maps of these two scales [33], which are merged\nby maxout as in [33]. Multi-scale testing improves the mAP\nby over 2 points (Table 9).\nUsing validation data.Next we use the 80k+40k trainval set\nfor training and the 20k test-dev set for evaluation. The test-\ndev set has no publicly available ground truth and the result\nis reported by the evaluation server. Under this setting, the\nresults are an mAP@.5 of 55.7% and an mAP@[.5, .95] of\n34.9% (Table 9). This is our single-model result.\nEnsemble.In Faster R-CNN, the system is designed to learn\nregion proposals and also object classifiers, so an ensemble\ncan be used to boost both tasks. We use an ensemble for\nproposing regions, and the union set of proposals are pro-\ncessed by an ensemble of per-region classifiers. Table 9\nshows our result based on an ensemble of 3 networks. The\nmAP is 59.0% and 37.4% on the test-dev set.This result\nwon the 1st place in the detection task in COCO 2015.\nPASCAL VOC\nWe revisit the PASCAL VOC dataset based on the above\nmodel. With the single model on the COCO dataset (55.7%\nmAP@.5 in Table 9), we fine-tune this model on the PAS-\nCAL VOC sets. The improvements of box refinement, con-\ntext, and multi-scale testing are also adopted. By doing so\nval2test\nGoogLeNet [44] (ILSVRC’14)-43.9\nour single model (ILSVRC’15)60.558.8\nour ensemble (ILSVRC’15)63.662.1\nTable 12. Our results (mAP, %) on the ImageNet detection dataset.\nOur detection system is Faster R-CNN [32] with the improvements\nin Table 9, using ResNet-101.\nwe achieve 85.6% mAP on PASCAL VOC 2007 (Table 10)\nand 83.8% on PASCAL VOC 2012 (Table 11)\n6\n. The result\non PASCAL VOC 2012 is 10 points higher than the previ-\nous state-of-the-art result [6].\nImageNet Detection\nThe ImageNet Detection (DET) task involves 200 object\ncategories. The accuracy is evaluated by mAP@.5. Our\nobject detection algorithm for ImageNet DET is the same\nas that for MS COCO in Table 9. The networks are pre-\ntrained on the 1000-class ImageNet classification set, and\nare fine-tuned on the DET data. We split the validation set\ninto two parts (val1/val2) following [8]. We fine-tune the\ndetection models using the DET training set and the val1\nset. The val2 set is used for validation. We do not use other\nILSVRC 2015 data. Our single model with ResNet-101 has\n6\nhttp://host.robots.ox.ac.uk:8080/anonymous/3OJ4OJ.html,\nsubmitted on 2015-11-26.\n11\n\nLOC\nmethod\nLOC\nnetwork\ntesting\nLOC error\non GT CLS\nclassification\nnetwork\ntop-5 LOC error\non predicted CLS\nVGG’s [41]VGG-161-crop33.1 [41]\nRPNResNet-1011-crop13.3\nRPNResNet-101dense11.7\nRPNResNet-101denseResNet-10114.4\nRPN+RCNNResNet-101denseResNet-10110.6\nRPN+RCNN\nensembledenseensemble8.9\nTable 13. Localization error (%) on the ImageNet validation. In\nthe column of “LOC error on GT class” ([41]), the ground truth\nclass is used. In the “testing” column, “1-crop” denotes testing\non a center crop of 224×224 pixels, “dense” denotes dense (fully\nconvolutional) and multi-scale testing.\n58.8% mAP and our ensemble of 3 models has 62.1% mAP\non the DET test set (Table 12).This result won the 1st place\nin the ImageNet detection task in ILSVRC 2015, surpassing\nthe second place by8.5 points(absolute).\nC. ImageNet Localization\nThe ImageNet Localization (LOC) task [36] requires to\nclassify and localize the objects. Following [40, 41], we\nassume that the image-level classifiers are first adopted for\npredicting the class labels of an image, and the localiza-\ntion algorithm only accounts for predicting bounding boxes\nbased on the predicted classes. We adopt the “per-class re-\ngression” (PCR) strategy [40, 41], learning a bounding box\nregressor for each class. We pre-train the networks for Im-\nageNet classification and then fine-tune them for localiza-\ntion. We train networks on the provided 1000-class Ima-\ngeNet training set.\nOur localization algorithm is based on the RPN frame-\nwork of [32] with a few modifications. Unlike the way in\n[32] that is category-agnostic, our RPN for localization is\ndesigned in aper-classform. This RPN ends with two sib-\nling 1×1 convolutional layers for binary classification (cls)\nand box regression (reg), as in [32]. Theclsandreglayers\nare both in aper-classfrom, in contrast to [32]. Specifi-\ncally, theclslayer has a 1000-d output, and each dimension\nisbinary logistic regressionfor predicting being or not be-\ning an object class; thereglayer has a 1000×4-d output\nconsisting of box regressors for 1000 classes. As in [32],\nour bounding box regression is with reference to multiple\ntranslation-invariant “anchor” boxes at each position.\nAs in our ImageNet classification training (Sec. 3.4), we\nrandomly sample 224×224 crops for data augmentation.\nWe use a mini-batch size of 256 images for fine-tuning. To\navoid negative samples being dominate, 8 anchors are ran-\ndomly sampled for each image, where the sampled positive\nand negative anchors have a ratio of 1:1 [32]. For testing,\nthe network is applied on the image fully-convolutionally.\nTable 13 compares the localization results. Following\n[41], we first perform “oracle” testing using the ground truth\nclass as the classification prediction. VGG’s paper [41] re-\nmethod\ntop-5 localization err\nvaltest\nOverFeat [40] (ILSVRC’13)30.029.9\nGoogLeNet [44] (ILSVRC’14)-26.7\nVGG [41] (ILSVRC’14)\n26.925.3\nours (ILSVRC’15)8.99.0\nTable 14. Comparisons of localization error (%) on the ImageNet\ndataset with state-of-the-art methods.\nports a center-crop error of 33.1% (Table 13) using ground\ntruth classes. Under the same setting, our RPN method us-\ning ResNet-101 net significantly reduces the center-crop er-\nror to 13.3%. This comparison demonstrates the excellent\nperformance of our framework. With dense (fully convolu-\ntional) and multi-scale testing, our ResNet-101 has an error\nof 11.7% using ground truth classes. Using ResNet-101 for\npredicting classes (4.6% top-5 classification error, Table 4),\nthe top-5 localization error is 14.4%.\nThe above results are only based on theproposal network\n(RPN) in Faster R-CNN [32]. One may use thedetection\nnetwork(Fast R-CNN [7]) in Faster R-CNN to improve the\nresults. But we notice that on this dataset, one image usually\ncontains a single dominate object, and the proposal regions\nhighly overlap with each other and thus have very similar\nRoI-pooled features. As a result, the image-centric training\nof Fast R-CNN [7] generates samples of small variations,\nwhich may not be desired for stochastic training. Motivated\nby this, in our current experiment we use the original R-\nCNN [8] that is RoI-centric, in place of Fast R-CNN.\nOur R-CNN implementation is as follows. We apply the\nper-class RPN trained as above on the training images to\npredict bounding boxes for the ground truth class. These\npredicted boxes play a role of class-dependent proposals.\nFor each training image, the highest scored 200 proposals\nare extracted as training samples to train an R-CNN classi-\nfier. The image region is cropped from a proposal, warped\nto 224×224 pixels, and fed into the classification network\nas in R-CNN [8]. The outputs of this network consist of two\nsibling fc layers forclsandreg, also in a per-class form.\nThis R-CNN network is fine-tuned on the training set us-\ning a mini-batch size of 256 in the RoI-centric fashion. For\ntesting, the RPN generates the highest scored 200 proposals\nfor each predicted class, and the R-CNN network is used to\nupdate these proposals’ scores and box positions.\nThis method reduces the top-5 localization error to\n10.6% (Table 13). This is our single-model result on the\nvalidation set. Using an ensemble of networks for both clas-\nsification and localization, we achieve a top-5 localization\nerror of 9.0% on the test set. This number significantly out-\nperforms the ILSVRC 14 results (Table 14), showing a 64%\nrelative reduction of error.This result won the 1st place in\nthe ImageNet localization task in ILSVRC 2015.\n12", + "dataFromArxiv": { + "id": "http://arxiv.org/abs/1512.03385v1", + "updated": "2015-12-10T19:51:55Z", + "published": "2015-12-10T19:51:55Z", + "title": "Deep Residual Learning for Image Recognition", + "summary": " Deeper neural networks are more difficult to train. We present a residual\nlearning framework to ease the training of networks that are substantially\ndeeper than those used previously. We explicitly reformulate the layers as\nlearning residual functions with reference to the layer inputs, instead of\nlearning unreferenced functions. We provide comprehensive empirical evidence\nshowing that these residual networks are easier to optimize, and can gain\naccuracy from considerably increased depth. On the ImageNet dataset we evaluate\nresidual nets with a depth of up to 152 layers---8x deeper than VGG nets but\nstill having lower complexity. An ensemble of these residual nets achieves\n3.57% error on the ImageNet test set. This result won the 1st place on the\nILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100\nand 1000 layers.\n The depth of representations is of central importance for many visual\nrecognition tasks. Solely due to our extremely deep representations, we obtain\na 28% relative improvement on the COCO object detection dataset. Deep residual\nnets are foundations of our submissions to ILSVRC & COCO 2015 competitions,\nwhere we also won the 1st places on the tasks of ImageNet detection, ImageNet\nlocalization, COCO detection, and COCO segmentation.\n", + "author": [ + { + "name": "Kaiming He" + }, + { + "name": "Xiangyu Zhang" + }, + { + "name": "Shaoqing Ren" + }, + { + "name": "Jian Sun" + } + ], + "arxiv:comment": { + "_": "Tech report", + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom" + } + }, + "link": [ + { + "$": { + "href": "http://arxiv.org/abs/1512.03385v1", + "rel": "alternate", + "type": "text/html" + } + }, + { + "$": { + "title": "pdf", + "href": "http://arxiv.org/pdf/1512.03385v1", + "rel": "related", + "type": "application/pdf" + } + } + ], + "arxiv:primary_category": { + "$": { + "xmlns:arxiv": "http://arxiv.org/schemas/atom", + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + }, + "category": { + "$": { + "term": "cs.CV", + "scheme": "http://arxiv.org/schemas/atom" + } + } + } + }, + "arxiv_2002.09002": { + "path": [ + "rusthorn.pdf" + ], + "idType": "arxiv", + "tags": [], + "comments": "", + "text": "\n\nRustHorn: CHC-based Verification for Rust\nPrograms (full version)\n?\nYusuke Matsushita\n1\n, Takeshi Tsukada\n1\n, and Naoki Kobayashi\n1\nThe University of Tokyo, Tokyo, Japan\n{yskm24t,tsukada,koba}@is.s.u-tokyo.ac.jp\nAbstract.Reduction to the satisfiablility problem for constrained Horn\nclauses (CHCs) is a widely studied approach to automated program veri-\nfication. The current CHC-based methods for pointer-manipulating pro-\ngrams, however, are not very scalable. This paper proposes a novel trans-\nlation of pointer-manipulating Rust programs into CHCs, which clears\naway pointers and heaps by leveraging ownership. We formalize the trans-\nlation for a simplified core of Rust and prove its correctness. We have\nimplemented a prototype verifier for a subset of Rust and confirmed the\neffectiveness of our method.\n1 Introduction\nReduction toconstrained Horn clauses (CHCs)is a widely studied approach to\nautomated program verification [22,6]. A CHC is a Horn clause [30] equipped\nwith constraints, namely a formula of the formφ⇐=ψ\n0\n∧···∧ψ\nk−1\n, whereφ\nandψ\n0\n,...,ψ\nk−1\nare either an atomic formula of the formf(t\n0\n,...,t\nn−1\n) (fis\napredicate variableandt\n0\n,...,t\nn−1\nare terms), or a constraint (e.g.a < b+ 1).\n1\nWe call a finite set of CHCs aCHC systemor sometimes just CHC.CHC solving\nis an act of deciding whether a given CHC systemShas amodel, i.e. a valuation\nfor predicate variables that makes all the CHCs inSvalid. A variety of program\nverification problems can be naturally reduced to CHC solving.\nFor example, let us consider the following C code that defines McCarthy’s\n91 function.\nint mc91(int n) {\nif (n > 100) return n - 10; else return mc91(mc91(n + 11));\n}\nSuppose that we wish to provemc91(n) returns 91 whenevern≤101 (if it ter-\nminates). The wished property is equivalent to the satisfiability of the following\nCHCs, whereMc91(n,r) means thatmc91(n) returnsrif it terminates.\nMc91(n,r)⇐=n >100∧r=n−10\n?\nThis paper is the full version of [47].\n1\nFree variables are universally quantified. Terms and variables are governed under\nsorts (e.g.int,bool), which are made explicit in the formalization of§3.\narXiv:2002.09002v1 [cs.PL] 20 Feb 2020\n\n2Y. Matsushita et al.\nMc91(n,r)⇐=n≤100∧Mc91(n+ 11,res\n′\n)∧Mc91(res\n′\n,r)\nr= 91⇐=n≤101∧Mc91(n,r)\nThe property can be verified because this CHC system has a model:\nMc91(n,r) :⇐⇒r= 91∨(n >100∧r=n−10).\nA CHC solver provides a common infrastructure for a variety of programming\nlanguages and properties to be verified. There have been effective CHC solvers\n[40,18,29,12] that can solve instances obtained from actual programs\n2\nand many\nprogram verification tools [23,37,25,28,38,60] use a CHC solver as a backend.\nHowever, the current CHC-based methods do not scale very well for programs\nusingpointers, as we see in§1.1. We propose a novel method to tackle this\nproblem for pointer-manipulating programs underRust-style ownership, as we\nexplain in§1.2.\n1.1 Challenges in Verifying Pointer-Manipulating Programs\nThe standard CHC-based approach [23] for pointer-manipulating programs rep-\nresents the memory state as anarray, which is passed around as an argument\nof each predicate (cf. thestore-passing style), and a pointer as an index.\nFor example, a pointer-manipulating variation of the previous program\nvoid mc91p(int n, int* r) {\nif (n > 100) *r = n - 10;\nelse { int s; mc91p(n + 11, &s); mc91p(s, r); }\n}\nis translated into the following CHCs by the array-based approach:\n3\nMc91p(n,r,h,h\n′\n)⇐=n >100∧h\n′\n=h{r←n−10}\nMc91p(n,r,h,h\n′\n)⇐=n≤100∧Mc91p(n+ 11,s,h,h\n′′\n)\n∧Mc91p(h\n′′\n[s],r,h\n′′\n,h\n′\n)\nh\n′\n[r] = 91⇐=n≤101∧Mc91p(n,r,h,h\n′\n).\nMc91padditionally takes two arraysh,h\n′\nrepresenting the (heap) memory states\nbefore/after the call ofmc91p. The second argumentrofMc91p, which corre-\nsponds to the pointer argumentrin the original program, is an index for the\narrays. Hence, the assignment*r = n - 10is modeled in the first CHC as an\nupdate of ther-th element of the array. This CHC system has a model\nMc91p(n,r,h,h\n′\n) :⇐⇒h\n′\n[r] = 91∨(n >100∧h\n′\n[r] =n−10),\nwhich can be found by some array-supporting CHC solvers including Spacer [40],\nthanks to evolving SMT-solving techniques for arrays [62,10].\nHowever, the array-based approach has some shortcomings. Let us consider,\nfor example, the following innocent-looking code.\n4\n2\nFor example, the above CHC system onMc91can be solved instantly by many\nCHC solvers including Spacer [40] and HoIce [12].\n3\nh{r←v}is the array made fromhby replacing the value at indexrwithv.h[r] is\nthe value of arrayhat indexr.\n4\nrand()is a non-deterministic function that can return any integer value.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)3\nbool just_rec(int* ma) {\nif (rand() >= 0) return true;\nint old_a = *ma; int b = rand(); just_rec(&b);\nreturn (old_a == *ma);\n}\nIt can immediately returntrue; or it recursively calls itself and checks if the\ntarget ofmaremains unchanged through the recursive call. In effect this function\ndoes nothingon the allocated memory blocks, although it can possibly modify\nsome of the unused parts of the memory.\nSuppose we wish to verify thatjust_recnever returnsfalse. The standard\nCHC-based verifier for C, SeaHorn [23], generates a CHC system like below:\n56\nJustRec(ma,h,h\n′\n,r)⇐=h\n′\n=h∧r=true\nJustRec(ma,h,h\n′\n,r)⇐=mb6=ma∧h\n′′\n=h{mb←b}\n∧JustRec(mb,h\n′′\n,h\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec(ma,h,h\n′\n,r)\nUnfortunately the CHC system above isnotsatisfiable and thus SeaHorn issues\na false alarm. This is because, in this formulation,mbmay not necessarily be\ncompletely fresh; it is assumed to be different from the argumentmaof the\ncurrent call, but may coincide withmaof some deep ancestor calls.\n7\nThe simplest remedy would be to explicitly specify the way of memory allo-\ncation. For example, one can represent the memory state as a pair of an arrayh\nand an indexspindicating the maximum index that has been allocated so far.\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=h\n′\n=h∧sp\n′\n=sp∧r=true\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)⇐=mb=sp\n′′\n=sp+ 1∧h\n′′\n=h{mb←b}\nJustRec\n+\n(mb,h\n′′\n,sp\n′′\n,h\n′\n,sp\n′\n,r\n′\n)∧r= (h[ma] ==h\n′\n[ma])\nr=true⇐=JustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r)∧ma≤sp\nThe resulting CHC system now has a model, but it involves quantifiers:\nJustRec\n+\n(ma,h,sp,h\n′\n,sp\n′\n,r) :⇐⇒r=true∧ ∀i≤sp.h[i] =h\n′\n[i]\nFinding quantified invariants is known to be difficult in general despite ac-\ntive studies on it [41,2,36,26,19] and most current array-supporting CHC solvers\ngive up finding quantified invariants. In general, much more complex operations\non pointers can naturally take place, which makes the universally quantified in-\nvariants highly involved and hard to automatically find. To avoid complexity of\nmodels, CHC-based verification tools [23,24,37] tackle pointers by pointer anal-\nysis [61,43]. Although it does have some effects, the current applicable scope of\npointer analysis is quite limited.\n5\n==,!=,>=,&& denote binary operations that return boolean values.\n6\nWe omitted the allocation forold_afor simplicity.\n7\nPrecisely speaking, SeaHorn tends to even omit shallow address-freshness checks\nlikemb6=ma.\n\n4Y. Matsushita et al.\n1.2 Our Approach: Leverage Rust’s Ownership System\nThis paper proposes a novel approach to CHC-based verification of pointer-\nmanipulating programs, which makes use ofownershipinformation to avoid an\nexplicit representation of the memory.\nRust-style Ownership.Various styles ofownership/permission/capabilityhave\nbeen introduced to control and reason about usage of pointers on programming\nlanguage design, program analysis and verification [13,31,8,31,9,7,64,63]. In what\nfollows, we focus on the ownership in the style of the Rust programming language\n[46,55].\nRoughly speaking, the ownership system guarantees that, for each memory\ncell and at each point of program execution, either (i) only one alias has the\nupdate(write & read) permission to the cell, with any other alias havingno\npermission to it, or (ii) some (or no) aliases have thereadpermission to the cell,\nwith no alias having the update permission to it. In summary,when an alias\ncan read some data(with an update/read permission),any other alias cannot\nmodify the data.\nAs a running example, let us consider the program below, which follows\nRust’s ownership discipline (it is written in the C style; the Rust version is\npresented at Example 1):\nint* take_max(int* ma, int* mb) {\nif (*ma >= *mb) return ma; else return mb;\n}\nbool inc_max(int a, int b) {\n{\nint* mc = take_max(&a, &b);// borrow a and b\n*mc += 1;\n}// end of borrow\nreturn (a != b);\n}\nFigure 1 illustrates which alias has the update permission to the contents ofa\nandbduring the execution oftake_max(5,3).\nA notable feature isborrow. In the running example, when the pointers&a\nand&bare taken fortake_max, theupdate permissionsofaandbaretemporarily\ntransferredto the pointers. The original variables,aandb,lose the ability to\naccess their contentsuntil the end of borrow. The functiontake_maxreturns a\npointer having the update permission until the end of borrow, which justifies the\nupdate operation*mc += 1. In this example, the end of borrow is at the end of\nthe inner block ofinc_max. At this point,the permissions are given backto the\noriginal variablesaandb, allowing to computea != b. Note thatmccan point\ntoaand also toband that this choice is determineddynamically. The values of\naandbafter the borrowdepend on the behavior of the pointermc.\nThe end of each borrow is statically managed by alifetime. See§2 for a more\nprecise explanation of ownership, borrow and lifetimes.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)5\n56\n3 \ncall\ntake_max\nreturn\ntake_max\nend of\nborrowing\nma\na\nmc\nmb\nb\n(i)(ii)(iii)(iv)\nFig. 1.Values and aliases ofaandbin evaluatinginc_max(5,3). Each line shows\neach variable’s permission timeline: a solid line expresses the update permission and a\nbullet shows a point when the borrowed permission is given back. For example,bhas\nthe update permission to its content during (i) and (iv), but not during (ii) and (iii)\nbecause the pointermb, created at the call oftake_max,borrowsbuntil the end of (iii).\nKey Idea.The key idea of our method is torepresent a pointermaas a pair〈a,a\n◦\n〉\nof the current target valueaand the target valuea\n◦\nat the end of borrow.\n89\nThis\nrepresentation employsaccess to the future information(it is related toprophecy\nvariables; see§5). This simple idea turns out to be very powerful.\nIn our approach, the verification problem “Doesinc_maxalways returntrue?”\nis reduced to the satisfiability of the following CHCs:\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a≥b∧b\n◦\n=b∧r=〈a,a\n◦\n〉\nTakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a < b∧a\n◦\n=a∧r=〈b,b\n◦\n〉\nIncMax(a,b,r)⇐=TakeMax(〈a,a\n◦\n〉,〈b,b\n◦\n〉,〈c,c\n◦\n〉)∧c\n′\n=c+ 1\n∧c\n◦\n=c\n′\n∧r= (a\n◦\n!=b\n◦\n)\nr=true⇐=IncMax(a,b,r).\nThe mutable referencemais now represented as〈a,a\n◦\n〉, and similarly formband\nmc. The first CHC models the then-clause oftake_max: the return value isma,\nwhich is expressed asr=〈a,a\n◦\n〉; in contrast,mbis released, whichconstrains\nb\n◦\n, the value ofbat the end of borrow, to the current valueb. In the clause on\nIncMax,mcis represented as a pair〈c,c\n◦\n〉. The constraintc\n′\n=c+ 1∧c\n◦\n=c\n′\nmodels the increment ofmc(in the phase (iii) in Fig. 1). Importantly, the final\nchecka != bis simply expressed asa\n◦\n!=b\n◦\n; the updated values ofa/bare\navailable asa\n◦\n/b\n◦\n. Clearly, the CHC system above has a simple model.\nAlso, thejust_recexample in§1.1 can be encoded as a CHC system\nJustRec(〈a,a\n◦\n〉,r)⇐=a\n◦\n=a∧r=true\nJustRec(〈a,a\n◦\n〉,r)⇐=mb=〈b,b\n◦\n〉 ∧JustRec(mb,r\n′\n)\n∧a\n◦\n=a∧r= (a==a\n0\n)\n8\nPrecisely, this is the representation of a pointer with a borrowed update permission\n(i.e.mutable reference). Other cases are discussed in§3.\n9\nFor example, in the case of Fig. 1, whentake_maxis called, the pointermais〈5,6〉\nandmbis〈3,3〉.\n\n6Y. Matsushita et al.\nr=true⇐=JustRec(〈a,a\n◦\n〉,r).\nNow it has a simple model:JustRec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a\n◦\n=a. Re-\nmarkably, arrays and quantified formulas are not required to express the model,\nwhich allows the CHC system to be easily solved by many CHC solvers. More\nadvanced examples are presented in§3.4, including one with destructive update\non a singly-linked list.\nContributions.Based on the above idea, we formalize the translation from pro-\ngrams to CHC systems for a core language of Rust, prove correctness (both\nsoundness and completeness) of the translation, and confirm the effectiveness\nof our approach through preliminary experiments. The core language supports,\namong others, recursive types. Remarkably, our approach enables us to automat-\nically verify some properties of a program with destructive updates on recursive\ndata types such as lists and trees.\nThe rest of the paper is structured as follows. In§2, we provide a formalized\ncore language of Rust supporting recursions, lifetime-based ownership and recur-\nsive types. In§3, we formalize our translation from programs to CHCs and prove\nits correctness. In§4, we report on the implementation and the experimental\nresults. In§5 we discuss related work and in§6 we conclude the paper.\n2 Core Language: Calculus of Ownership and Reference\nWe formalize a core of Rust asCalculus of Ownership and Reference (COR),\nwhose design has been affected by the safe layer ofλ\nRust\nin the RustBelt paper\n[32]. It is a typed procedural language with a Rust-like ownership system.\n2.1 Syntax\nThe following is the syntax of COR.\n(program)Π::=F\n0\n···F\nn−1\n(function definition)F::=fnf Σ{L\n0\n:S\n0\n···L\nn−1\n:S\nn−1\n}\n(function signature)Σ::=〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉\n(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U\n(statement)S::=I;gotoL|returnx\n|match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}\n(instruction)I::=lety=mutbor\nα\nx|dropx|immutx|swap(∗x,∗y)\n|let∗y=x|lety=∗x|let∗y=copy∗x|xasT\n|lety=f〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n)\n|introα|nowα|α≤β\n|let∗y=const|let∗y=∗xop∗x\n′\n|let∗y=rand()\n|let∗y=inj\nT\n0\n+T\n1\ni\n∗x|let∗y= (∗x\n0\n,∗x\n1\n)|let(∗y\n0\n,∗y\n1\n) =∗x\n(type)T,U::=X|μX.T|P T|T\n0\n+T\n1\n|T\n0\n×T\n1\n|int|unit\n(pointer kind)P::=own|R\nα\n(reference kind)R::=mut|immut\n\nRustHorn: CHC-based Verification for Rust Programs (full version)7\nα,β,γ::= (lifetime variable)X,Y::= (type variable)\nx,y::= (variable)f,g::= (function name)L::= (label)\nconst::=n|()bool:=unit+unitop::=op\nint\n|op\nbool\nop\nint\n::= +|−|···op\nbool\n::=>=|==|!=|···\nProgram, Function and Label.A program (denoted byΠ) is a set of function\ndefinitions. A function definition (F) consists of a function name, a function\nsignature and a set of labeled statements (L:S). In COR, for simplicity, the\ninput/output types of a function are restricted topointer types. A function is\nparametrized over lifetime parameters under constraints; polymorphism on types\nis not supported for simplicity, just asλ\nRust\n. For the lifetime parameter receiver,\noften〈α\n0\n,···|〉is abbreviated to〈α\n0\n,...〉and〈|〉is omitted.\nA label (L) is an abstract program point to be jumped to bygoto.\n10\nEach\nlabel is assigned awhole contextby the type system, as we see later. This style,\nwith unstructured control flows, helps the formal description of CHCs in§3.2. A\nfunction should have the labelentry(entry point), and every label in a function\nshould be syntactically reachable fromentrybygotojumps.\n11\nStatement and Instruction.A statement (S) performs an instruction with a jump\n(I;gotoL), returns from a function (returnx), or branches (match∗x{···}).\nAn instruction (I) performs an elementary operation: mutable (re)borrow\n(lety=mutbor\nα\nx), releasing a variable (dropx), weakening ownership (immut\nx),\n12\nswap (swap(∗x,∗y)), creating/dereferencing a pointer (let∗y=x,lety=\n∗x), copy (let∗y=copy∗x),\n13\ntype weakening (xasT), function call (lety=\nf〈···〉(···)), lifetime-related ghost operations (introα,nowα, α≤β; explained\nlater), getting a constant / operation result / random integer (let∗y=const/\n∗xop∗x\n′\n/rand()), creating a variant (let∗y=inj\nT\n0\n+T\n1\ni\n∗x), and creating/destruct-\ning a pair (let∗y= (∗x\n0\n,∗x\n1\n),let(∗y\n0\n,∗y\n1\n) =∗x). An instruction of form\nlet∗y=···implicitly allocates new memory cells asy; also, some instruc-\ntions deallocate memory cells implicitly. For simplicity, every variable is de-\nsigned to be apointerand everyrelease of a variableshould be explicitly an-\nnotated by ‘dropx’. In addition, we provide swap instead of assignment; the\nusual assignment (of copyable data from∗xto∗y) can be expressed bylet∗x\n′\n=\ncopy∗x;swap(∗y,∗x\n′\n);dropx\n′\n.\nType.As a type (T), we support recursive types (μX.T), pointer types (P T),\nvariant types (T\n0\n+T\n1\n), pair types (T\n0\n×T\n1\n) and basic types (int,unit).\nA pointer typeP Tcan be anowning pointerownT(Boxin Rust),muta-\nble referencemut\nα\nT(&'a mut T) orimmutable referenceimmut\nα\nT(&'a T). An\n10\nIt is related to acontinuationintroduced byletcontinλ\nRust\n.\n11\nHere ‘syntactically’ means that detailed information such that a branch condition\nonmatchor non-termination is ignored.\n12\nThis instruction turns a mutable reference to an immutable reference. Using this,\nan immutable borrow fromxtoycan be expressed bylety=mutbor\nα\nx;immuty.\n13\nCopying a pointer (an immutable reference)xtoycan be expressed bylet∗ox=\nx;let∗oy=copy∗ox;lety=∗oy.\n\n8Y. Matsushita et al.\nowning pointerhas data in the heap memory, can freely update the data (un-\nless it is borrowed), and has the obligation to clean up the data from the heap\nmemory. In contrast, amutable/immutable reference(orunique/shared refer-\nence) borrows an update/read permission from an owning pointer or another\nreference with the deadline of alifetimeα(introduced later). A mutable ref-\nerence cannot be copied, while an immutable reference can be freely copied. A\nreference loses the permission at the time when it is released.\n14\nA typeTthat appears in a program (not just as a substructure of some type)\nshould satisfy the following condition (if it holds we say the type iscomplete):\nevery type variableXinTis bound by someμand guarded by a pointer con-\nstructor (i.e. given a binding of formμX.U, every occurrence ofXinUis a part\nof a pointer type, of formP U\n′\n).\nLifetime.Alifetimeis anabstract time point in the process of computation,\n15\nwhich is statically managed bylifetime variablesα. A lifetime variable can be a\nlifetime parameterthat a function takes or alocal lifetime variableintroduced\nwithin a function. We have three lifetime-related ghost instructions:introαin-\ntroduces a new local lifetime variable,nowαsets a local lifetime variable to\nthe current moment and eliminates it, andα≤βasserts the ordering on local\nlifetime variables.\nExpressivity and Limitations.COR can express most borrow patterns in the\ncore of Rust. The set of moments when a borrow is active forms a continuous\ntime range, even undernon-lexical lifetimes[54].\n16\nA major limitation of COR is that it does not supportunsafe code blocksand\nalso lackstype traits and closures. Still, our idea can be combined with unsafe\ncode and closures, as discussed in§3.5. Another limitation of COR is that, unlike\nRust andλ\nRust\n, wecannot directly modify/borrow a fragment of a variable(e.g.\nan element of a pair). Still, we can eventually modify/borrow a fragment by\nborrowing the whole variable andsplitting pointers(e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’).\nThis borrow-and-split strategy, nevertheless, yields a subtle obstacle when we\nextend the calculus for advanced data types (e.g.get_defaultin ‘Problem Case\n#3’ from [54]). For future work, we pursue a more expressive calculus modeling\nRust and extend our verification method to it.\nExample 1 (COR Program).The following program expresses the functionstake_max\nandinc_maxpresented in§1.2. We shorthand sequential executions by ‘;\nL\n’ (e.g.\n14\nIn Rust, even after a reference loses the permission and the lifetime ends, its address\ndata can linger in the memory, although dereferencing on the reference is no longer\nallowed. We simplify the behavior of lifetimes in COR.\n15\nIn the terminology of Rust, a lifetime often means a time range where a borrow is\nactive. To simplify the discussions, however, we in this paper use the term lifetime\nto refer to atime point when a borrow ends.\n16\nStrictly speaking, this property is broken by recently adopted implicit two-phase\nborrows [59,53]. However, by shallow syntactical reordering, a program with implicit\ntwo-phase borrows can be fit into usual borrow patterns.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)9\nL\n0\n:I\n0\n;\nL\n1\nI\n1\n;gotoL\n2\nstands forL\n0\n:I\n0\n;gotoL\n1\nL\n1\n:I\n1\n;gotoL\n2\n).\n17\nfn take-max〈α〉(ma:mut\nα\nint,mb:mut\nα\nint)→mut\nα\nint{\nentry:let∗ord=∗ma>=∗mb;\nL1\nmatch∗ord{inj\n1\n∗ou→goto L2,inj\n0\n∗ou→goto L5}\nL2:dropou;\nL3\ndropmb;\nL4\nreturnmaL5:dropou;\nL6\ndropma;\nL7\nreturnmb\n}\nfn inc-max(oa:own int,ob:own int)→own bool{\nentry:introα;\nL1\nletma=mutbor\nα\noa;\nL2\nletmb=mutbor\nα\nob;\nL3\nletmc=take-max〈α〉(ma,mb);\nL4\nlet∗o1= 1;\nL5\nlet∗oc\n′\n=∗mc+∗o1;\nL6\ndropo1;\nL7\nswap(mc,oc\n′\n);\nL8\ndropoc\n′\n;\nL9\ndropmc;\nL10\nnowα;\nL11\nlet∗or=∗oa!=∗ob;\nL12\ndropoa;\nL13\ndropob;\nL14\nreturnor\n}\nIntake-max, conditional branching is performed bymatchand itsgotodirections\n(atL1). Ininc-max, increment on the mutable referencemcis performed by\ncalculating the new value (atL4,L5) and updating the data by swap (atL7).\nThe following is the corresponding Rust program, with ghost annotations\n(marked italic and dark green, e.g.drop ma) on lifetimes and releases of mutable\nreferences.\nfn take_max<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif *ma >= *mb {drop mb;ma } else {drop ma;mb }\n}\nfn inc_max(mut a: i32, mut b: i32) -> bool {\n{intro 'a;\nlet mc = take_max<'a>(&'amut a, &'amut b); *mc += 1;\ndrop mc; now 'a;}\na != b\n}\n2.2 Type System\nThe type system of COR assigns to each label awhole context(Γ,A). We define\nbelow the whole context and the typing judgments.\nContext.Avariable contextΓis a finite set of items of formx:\na\nT, whereT\nshould be a completepointertype anda(which we callactiveness) is of form\n‘active’ or ‘†α’ (frozenuntil lifetimeα). We abbreviatex:\nactive\nTasx:T. A\nvariable context should not contain two items on the same variable. Alifetime\ncontextA= (A,R) is a finite preordered set of lifetime variables, whereAis the\nunderlying set andRis the preorder. We write|A|and≤\nA\nto refer toAandR.\nFinally, awhole context(Γ,A) is a pair of a variable contextΓand a lifetime\ncontextAsuch that every lifetime variable inΓis contained inA.\n17\nThe first character of each variable indicates the pointer kind (o/mcorresponds to\nown/mut\nα\n). We swap the branches of thematchstatement intake-max, to fit the\norder to C/Rust’sif.\n\n10Y. Matsushita et al.\nNotations.The set operationA+B(or more generally\n∑\nλ\nA\nλ\n) denotes the\ndisjoint union, i.e. the union defined only if the arguments are disjoint. The set\noperationA−Bdenotes the set difference defined only ifA⊇B. For a natural\nnumbern, [n] denotes the set{0,...,n−1}.\nGenerally, an auxiliary definition for a rule can be presented just below,\npossibly in a dotted box.\nProgram and Function.The rules for typing programs and functions are pre-\nsented below. They assign to each label a whole context (Γ,A). ‘S:\nΠ,f\n(Γ,A)|\n(Γ\nL\n,A\nL\n)\nL\n|U’ is explained later.\nfor anyFinΠ, F:\nΠ\n(Γ\nname(F),L\n,A\nname(F),L\n)\nL∈Label\nF\nΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nname(F): the function name ofFLabel\nF\n: the set of labels inF\nFnLabel\nΠ\n: the set of pairs (f,L) such that a functionfinΠhas a labelL\nF=fnf〈α\n0\n,...,α\nm−1\n|α\na\n0\n≤α\nb\n0\n,...,α\na\nl−1\n≤α\nb\nl−1\n〉(x\n0\n:T\n0\n,...,x\nn−1\n:T\nn−1\n)→U{···}\nΓ\nentry\n={x\ni\n:T\ni\n|i∈[n]}A={α\nj\n|j∈[m]}A\nentry\n=\n(\nA,\n(\nId\nA\n∪{(α\na\nk\n,α\nb\nk\n)|k∈[l]}\n)\n+\n)\nfor anyL\n′\n:S∈LabelStmt\nF\n, S:\nΠ,f\n(Γ\nL\n′\n,A\nL\n′\n)|(Γ\nL\n,A\nL\n)\nL∈Label\nF\n|U\nF:\nΠ\n(Γ\nL\n,A\nL\n)\nL∈Label\nF\nLabelStmt\nF\n: the set of labeled statements inF\nId\nA\n: the identity relation onA R\n+\n: the transitive closure ofR\nOn the rule for the function, the initial whole context atentryis specified\n(the second and third preconditions) and also the contexts for other labels are\nchecked (the fourth precondition). The context for each label (in each function)\ncan actually be determined in the order by the distance in the number ofgoto\njumps fromentry, but that order is not very obvious because ofunstructured\ncontrol flows.\nStatement.‘S:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U’ means that running the statementS\n(underΠ,f) with the whole context (Γ,A) results in a jump to a label with the\nwhole contexts specified by (Γ\nL\n,A\nL\n)\nL\nor a return of data of typeU. Its rules\nare presented below. ‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ is explained later.\nI:\nΠ,f\n(Γ,A)→(Γ\nL\n0\n,A\nL\n0\n)\nI;gotoL\n0\n:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nΓ={x:U} |A|=A\nexΠ,f\nreturnx:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nA\nexΠ,f\n: the set of lifetime parameters offinΠ\nx:P(T\n0\n+T\n1\n)∈Γ\nfori= 0,1,(Γ\nL\ni\n,A\nL\ni\n) = (Γ−{x:P(T\n0\n+T\n1\n)}+{y\ni\n:P T\ni\n},A)\nmatch∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}:\nΠ,f\n(Γ,A)|(Γ\nL\n,A\nL\n)\nL\n|U\nThe rule for thereturnstatement ensures that there remain no extra variables\nand local lifetime variables.\nInstruction.‘I:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n)’ means that running the instructionI(un-\nderΠ,f) updates the whole context (Γ,A) into (Γ\n′\n,A\n′\n). The rules are designed\nso that, for anyI,Π,f, (Γ,A), there exists at most one (Γ\n′\n,A\n′\n) such that\n\nRustHorn: CHC-based Verification for Rust Programs (full version)11\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n) holds. Below we present some of the rules; the complete\nrules are presented in Appendix A.1. The following is the typing rule for mutable\n(re)borrow.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nLifetime\nT\n: the set of lifetime variables occurring inT\nAfter you mutably (re)borrow an owning pointer / mutable referencexuntilα,x\nisfrozenuntilα. Here,αshould be a local lifetime variable\n18\n(the first precondi-\ntion) that does not live longer than the data ofx(the third precondition). Below\nare the typing rules for local lifetime variable introduction and elimination.\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nOnintroα, it just ensures the new local lifetime variable to be earlier than\nany lifetime parameters (which are given by exterior functions). Onnowα, the\nvariables frozen withαget active again. Below is the typing rule for dereference\nof a pointer to a pointer, which may be a bit interesting.\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nThe third precondition of the typing rule formutborjustifies taking justαin\nthe rule ‘R\nα\n◦R\n′\nβ\n:=R\n′′\nα\n’.\nLet us interpretΠ: (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\nas “the programΠhas the\ntype (Γ\nf,L\n,A\nf,L\n)\n(f,L)∈FnLabel\nΠ\n”. The type system ensures that any program\nhas at most one type (which may be a bit unclear because of unstructured\ncontrol flows). Hereinafter, we implicitly assume that a program has a type.\n2.3 Concrete Operational Semantics\nWe introduce for CORconcrete operational semantics, which handles a concrete\nmodel of the heap memory.\nThe basic item,concrete configurationC, is defined as follows.\nS::= end\n∣\n∣\n[f,L]x,F;S(concrete configuration)C::= [f,L]F;S|H\nHere,His aheap, which maps addresses (represented by integers) to integers\n(data).Fis aconcrete stack frame, which maps variables to addresses. The stack\n18\nIn COR, a reference that lives after the return from the function should be cre-\nated by splitting a reference (e.g. ‘let(∗y\n0\n,∗y\n1\n) =∗x’) given in the inputs; see also\nExpressivity and Limitations.\n\n12Y. Matsushita et al.\npart ofCis of form ‘[f,L]F; [f\n′\n,L\n′\n]x,F\n′\n;···; end’ (we may omit the terminator\n‘; end’). [f,L] on each stack frame indicates the program point. ‘x,’ on each non-\ntop stack frame is the receiver of the value returned by the function call.\nConcrete operational semantics is characterized by the one-step transition\nrelationC→\nΠ\nC\n′\nand the termination relation final\nΠ\n(C), which can be de-\nfined straightforwardly. Below we show the rules for mutable (re)borrow, swap,\nfunction call and return from a function; the complete rules and an example\nexecution are presented in Appendix A.2.S\nΠ,f,L\nis the statement for the label\nLof the functionfinΠ. Ty\nΠ,f,L\n(x) is the type of variablexat the label.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nHere we introduce ‘#T’, which represents how many memory cells the typeT\ntakes (at the outermost level). #Tis defined for everycompletetypeT, because\nevery occurrence of type variables in a complete type is guarded by a pointer\nconstructor.\n#(T\n0\n+T\n1\n) := 1 + max{#T\n0\n,#T\n1\n}#(T\n0\n×T\n1\n) := #T\n0\n+ #T\n1\n#μX.T:= #T[μX.T/X] #int= #P T:= 1 #unit= 0\n3 CHC Representation of COR Programs\nTo formalize the idea discussed in§1, we give a translation from COR programs\nto CHC systems, which precisely characterize the input-output relations of the\nCOR programs. We first define the logic for CHCs (§3.1). We then formally\ndescribe our translation (§3.2) and prove its correctness (§3.3). Also, we examine\neffectiveness of our approach with advanced examples (§3.4) and discuss how\nour idea can be extended and enhanced (§3.5).\n3.1 Multi-sorted Logic for Describing CHCs\nTo begin with, we introduce a first-order multi-sorted logic for describing the\nCHC representation of COR programs.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)13\nSyntax.The syntax is defined as follows.\n(CHC)Φ::=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n>:= the nullary conjunction of formulas\n(formula)φ,ψ::=f(t\n0\n,...,t\nn−1\n) (elementary formula) ˇφ::=f(p\n0\n,...,p\nn−1\n)\n(term)t::=x| 〈t〉 | 〈t\n∗\n,t\n◦\n〉 |inj\ni\nt|(t\n0\n,t\n1\n)| ∗t| ◦t|t.i|const|topt\n′\n(value)v,w::=〈v〉 | 〈v\n∗\n,v\n◦\n〉 |inj\ni\nv|(v\n0\n,v\n1\n)|const\n(pattern)p,q::=x| 〈p〉 | 〈p\n∗\n,p\n◦\n〉 |inj\ni\np|(p\n0\n,p\n1\n)|const\n(sort)σ,τ::=X|μX.σ|C σ|σ\n0\n+σ\n1\n|σ\n0\n×σ\n1\n|int|unit\n(container kind)C::=box|mutconst::= same as CORop::= same as COR\nbool:=unit+unit true:=inj\n1\n()false:=inj\n0\n()\nX::= (sort variable)x,y::= (variable)f::= (predicate variable)\nWe introduceboxσandmutσ, which correspond toownT/immut\nα\nTand\nmut\nα\nTrespectively.〈t〉/〈t\n∗\n,t\n◦\n〉is the constructor forboxσ/mutσ.∗ttakes the\nbody/first value of〈−〉/〈−,−〉and◦ttakes the second value of〈−,−〉. We restrict\nthe form of CHCs here to simplify the proofs later. Although the logic does not\nhave a primitive for equality, we can define the equality in a CHC system (e.g.\nby adding∀x:σ.Eq(x,x)⇐=>).\nACHC system(Φ,Ξ) is a pair of a finite set of CHCsΦ={Φ\n0\n,...,Φ\nn−1\n}\nandΞ, whereΞis a finite map from predicate variables to tuples of sorts (denoted\nbyΞ), specifying the sorts of the input values. Unlike the informal description\nin§1, we addΞto a CHC system.\nSort System.‘t:\n∆\nσ’ (the termthas the sortσunder∆) is defined as follows.\nHere,∆is a finite map from variables to sorts.σ∼τis the congruence on sorts\ninduced byμX.σ∼σ[μX.σ/X].\n∆(x) =σ\nx:\n∆\nσ\nt:\n∆\nσ\n〈t〉:\n∆\nboxσ\nt\n∗\n,t\n◦\n:\n∆\nσ\n〈t\n∗\n,t\n◦\n〉:\n∆\nmutσ\nt:\n∆\nσ\ni\ninj\ni\nt:\n∆\nσ\n0\n+σ\n1\nt\n0\n:\n∆\nσ\n0\nt\n1\n:\n∆\nσ\n1\n(t\n0\n,t\n1\n):\n∆\nσ\n0\n×σ\n1\nt:\n∆\nC σ\n∗t:\n∆\nσ\nt:\n∆\nmutσ\n◦t:\n∆\nσ\nt:\n∆\nσ\n0\n+σ\n1\nt.i:\n∆\nσ\ni\nconst:\n∆\nσ\nconst\nt,t\n′\n:\n∆\nint\ntopt\n′\n:\n∆\nσ\nop\nt:\n∆\nσ σ∼τ\nt:\n∆\nτ\nσ\nconst\n: the sort ofconstσ\nop\n: the output sort ofop\n‘wellSorted\n∆,Ξ\n(φ)’ and ‘wellSorted\nΞ\n(Φ)’, the judgments on well-sortedness\nof formulas and CHCs, are defined as follows.\nΞ(f) = (σ\n0\n,...,σ\nn−1\n) for anyi∈[n], t\ni\n:\n∆\nσ\ni\nwellSorted\n∆,Ξ\n(f(t\n0\n,...,t\nn−1\n))\n∆={(x\ni\n,σ\ni\n)|i∈[m]}wellSorted\n∆,Ξ\n( ˇφ) for anyj∈[n],wellSorted\n∆,Ξ\n(ψ\nj\n)\nwellSorted\nΞ\n(\n∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\n)\nThe CHC system (Φ,Ξ) is said to be well-sorted if wellSorted\nΞ\n(Φ) holds for any\nΦ∈Φ.\nSemantics.‘[[t]]\nI\n’, the interpretation of the termtas a value underI, is defined\nas follows. Here,Iis a finite map from variables to values. Although the definition\n\n14Y. Matsushita et al.\nis partial, the interpretation is defined for all well-sorted terms.\n[[x]]\nI\n:=I(x) [[〈t〉]]\nI\n:=〈[[t]]\nI\n〉[[〈t\n∗\n,t\n◦\n〉]]\nI\n:=〈[[t\n∗\n]]\nI\n,[[t\n◦\n]]\nI\n〉[[inj\ni\nt]]\nI\n:=inj\ni\n[[t]]\nI\n[[(t\n0\n,t\n1\n)]]\nI\n:= ([[t\n0\n]]\nI\n,[[t\n1\n]]\nI\n) [[∗t]]\nI\n:=\n{\nv([[t]]\nI\n=〈v〉)\nv\n∗\n([[t]]\nI\n=〈v\n∗\n,v\n◦\n〉)\n[[◦t]]\nI\n:=v\n◦\nif [[t]]\nI\n=〈v\n∗\n,v\n◦\n〉\n[[t.i]]\nI\n:=v\ni\nif [[t]]\nI\n= (v\n0\n,v\n1\n) [[const]]\nI\n:=const[[topt\n′\n]]\nI\n:= [[t]]\nI\n[[op]][[t\n′\n]]\nI\n[[op]]: the binary operation on values corresponding toop\nApredicate structureMis a finite map from predicate variables to (concrete)\npredicates on values.M,I|=f(t\n0\n,...,t\nn−1\n) means thatM(f)([[t\n0\n]]\nI\n,...,[[t\nm−1\n]]\nI\n)\nholds.M|=Φis defined as follows.\nfor anyIs.t.∀i∈[m].I(x\ni\n):\n∅\nσ\ni\n,M,I|=ψ\n0\n,...,ψ\nn−1\nimpliesM,I|= ˇφ\nM|=∀x\n0\n:σ\n0\n,...,x\nm−1\n:σ\nm−1\n.ˇφ⇐=ψ\n0\n∧ ··· ∧ψ\nn−1\nFinally,M|= (Φ,Ξ) is defined as follows.\nfor any (f,(σ\n0\n,...,σ\nn−1\n))∈Ξ,M(f) is a predicate on values of sortσ\n0\n,...,σ\nn−1\ndomM= domΞfor anyΦ∈Φ,M|=Φ\nM|= (Φ,Ξ)\nWhenM|= (Φ,Ξ) holds, we say thatMis amodelof (Φ,Ξ). Every well-\nsorted CHC system (Φ,Ξ) has theleast modelon the point-wise ordering (which\ncan be proved based on the discussions in [16]), which we write asM\nleast\n(Φ,Ξ)\n.\n3.2 Translation from COR Programs to CHCs\nNow we formalize our translation of Rust programs into CHCs. We define (|Π|),\nwhich is a CHC system that represents the input-output relations of the functions\nin the COR programΠ.\nRoughly speaking, the least modelM\nleast\n(|Π|)\nfor this CHC system should sat-\nisfy: for any valuesv\n0\n,...,v\nn−1\n,w,M\nleast\n(|Π|)\n|=f\nentry\n(v\n0\n,...,v\nn−1\n,w) holds exactly\nif, in COR, a function callf(v\n0\n,...,v\nn−1\n) can returnw. Actually, in concrete\noperational semantics, such values should be read out from the heap memory.\nThe formal description and proof of this expected property is presented in§3.3.\nAuxiliary Definitions.The sort corresponding to the typeT, (|T|), is defined\nas follows.\nˇ\nPis a meta-variable for a non-mutable-reference pointer kind, i.e.\nownorimmut\nα\n. Note that the information on lifetimes is all stripped off.\n(|X|) :=X(|μX.T|) =μX.(|T|) (|\nˇ\nP T|) :=box(|T|) (|mut\nα\nT|) :=mut(|T|)\n(|int|) :=int(|unit|) :=unit(|T\n0\n+T\n1\n|) := (|T\n0\n|) + (|T\n1\n|) (|T\n0\n×T\n1\n|) := (|T\n0\n|)×(|T\n1\n|)\nWe introduce a special variableresto represent the result of a function.\n19\nFor\na labelLin a functionfin a programΠ, we define ˇφ\nΠ,f,L\n,Ξ\nΠ,f,L\nand∆\nΠ,f,L\n19\nFor simplicity, we assume that the parameters of each function are sorted respecting\nsome fixed orderon variables (withrescoming at the last), and we enumerate various\nitems in this fixed order.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)15\nas follows, if the items in the variable context for the label are enumerated as\nx\n0\n:\na\n0\nT\n0\n,...,x\nn−1\n:\na\nn−1\nT\nn−1\nand the return type of the function isU.\nˇφ\nΠ,f,L\n:=f\nL\n(x\n0\n,...,x\nn−1\n,res)Ξ\nΠ,f,L\n:= ((|T\n0\n|),...,(|T\nn−1\n|),(|U|))\n∆\nΠ,f,L\n:={(x\ni\n,(|T\ni\n|))|i∈[n]}+{(res,(|U|))}\n∀(∆) stands for∀x\n0\n:σ\n0\n, ..., x\nn−1\n:σ\nn−1\n, where the items in∆are enumerated\nas (x\n0\n,σ\n0\n),...,(x\nn−1\n,σ\nn−1\n).\nCHC Representation.Now we introduce ‘(|L:S|)\nΠ,f\n’, the set (in most cases,\nsingleton) of CHCs modeling the computation performed by the labeled state-\nmentL:SinffromΠ. Unlike informal descriptions in§1, we turn topattern\nmatchinginstead of equations, to simplify the proofs in Appendix C.3. Below\nwe show some of the rules; the complete rules are presented in Appendix B. The\nvariables marked green (e.g.x\n◦\n) should be fresh. The following is the rule for\nmutable (re)borrow.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nThe value at the end of borrow is represented as a newly introduced variablex\n◦\n.\nBelow is the rule for release of a variable.\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nWhen a variablexof typemut\nα\nTis dropped/released, we check the prophesied\nvalue at the end of borrow. Below is the rule for a function call.\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\nThe body (the right-hand side of⇐= ) of the CHC contains two formulas, which\nyields a kind of call stack at the level of CHCs. Below is the rule for a return\nfrom a function.\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\nThe variableresis forced to be equal to the returned variablex.\nFinally, (|Π|), the CHC system that represents the COR programΠ(or the\nCHC representationofΠ), is defined as follows.\n(|Π|) :=\n(\n∑\nFinΠ,L:S∈LabelStmt\nF\n(|L:S|)\nΠ,name\nF\n,(Ξ\nΠ,f,L\n)\nf\nL\ns.t. (f,L)∈FnLabel\nΠ\n)\nExample 2 (CHC Representation).We present below the CHC representation\noftake-maxdescribed in§2.1. We omit CHCs oninc-maxhere. We have also\n\n16Y. Matsushita et al.\nexcluded the variable binders ‘∀ ···’.\n20\ntake-max\nentry\n(ma,mb,res)⇐=take-max\nL1\n(ma,mb,〈∗ma>=∗mb〉,res)\ntake-max\nL1\n(ma,mb,〈inj\n1\n∗ou〉,res)⇐=take-max\nL2\n(ma,mb,ou,res)\ntake-max\nL1\n(ma,mb,〈inj\n0\n∗ou〉,res)⇐=take-max\nL5\n(ma,mb,ou,res)\ntake-max\nL2\n(ma,mb,ou,res)⇐=take-max\nL3\n(ma,mb,res)\ntake-max\nL3\n(ma,〈mb\n∗\n,mb\n∗\n〉,res)⇐=take-max\nL4\n(ma,res)\ntake-max\nL4\n(ma,ma)⇐=>\ntake-max\nL5\n(ma,mb,ou,res)⇐=take-max\nL6\n(ma,mb,res)\ntake-max\nL6\n(〈ma\n∗\n,ma\n∗\n〉,mb,res)⇐=take-max\nL7\n(mb,res)\ntake-max\nL7\n(mb,mb)⇐=>\nThe fifth and eighth CHC represent release ofmb/ma. The sixth and ninth CHC\nrepresent the determination of the return valueres.\n3.3 Correctness of the CHC Representation\nNow we formally state and prove the correctness of the CHC representation.\nNotations.We use{|···|}(instead of{···}) for the intensional description of\na multiset.A⊕B(or more generally\n⊕\nλ\nA\nλ\n) denotes the multiset sum (e.g.\n{|0,1|}⊕{|1|}={|0,1,1|}6={|0,1|}).\nReadout and Safe Readout.We introduce a few judgments to formally de-\nscribe how read out data from the heap.\nFirst, the judgment ‘readout\nH\n(∗a::T|v;M)’ (the data at the addressaof\ntypeTcan be read out from the heapHas the valuev, yielding the memory\nfootprintM) is defined as follows.\n21\nHere, amemory footprintMis a finite\nmultiset of addresses, which is employed for monitoring the memory usage.\nH(a) =a\n′\nreadout\nH\n(∗a\n′\n::T|v;M)\nreadout\nH\n(∗a:ownT|〈v〉;M⊕{|a|})\nreadout\nH\n(∗a::T[μX.T/X]|v;M)\nreadout\nH\n(∗a::μX.T/X|v;M)\nH(a) =n\nreadout\nH\n(∗a::int|n;{|a|})\nreadout\nH\n(∗a::unit|();∅)\nH(a) =i∈[2] for anyk∈[(#T\n1−i\n−#T\ni\n)\n≥0\n],H(a+1+#T\ni\n+k) = 0\nreadout\nH\n(∗(a+1) ::T\ni\n|v;M)\nreadout\nH\n(\n∗a::T\n0\n+T\n1\n|inj\ni\nv;M⊕{|a|}⊕{|a+1+#T\ni\n+k|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]|}\n)\n(n)\n≥0\n:= max{n,0}\nreadout\nH\n(\n∗a::T\n0\n|v\n0\n;M\n0\n)\nreadout\nH\n(\n∗(a+#T\n0\n) ::T\n1\n|v\n1\n;M\n1\n)\nreadout\nH\n(\n∗a::T\n0\n×T\n1\n|(v\n0\n,v\n1\n);M\n0\n⊕M\n1\n)\n20\nThesortsofthevariablesareasfollows:\nma,mb,res:mut int;ma\n∗\n,mb\n∗\n:int;ou:box unit.\n21\nHere we can ignore mutable/immutable references, because we focus on what we\ncallsimplefunctions, as explained later.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)17\nFor example, ‘readout\n{(100,7),(101,5)}\n(∗100 ::int×int|(7,5);{|100,101|})’ holds.\nNext, ‘readout\nH\n(F::Γ| F;M)’ (the data of the stack frameFrespecting\nthe variable contextΓcan be read out fromHasF, yieldingM) is defined as\nfollows. domΓstands for{x|x:\na\nT∈Γ}.\ndomF= domΓfor anyx:ownT∈Γ,readout\nH\n(∗F(x) ::T|v\nx\n;M\nx\n)\nreadout\nH\n(F::Γ|{(x,〈v\nx\n〉)|x∈domF};\n⊕\nx∈domF\nM\nx\n)\nFinally, ‘safe\nH\n(F::Γ| F)’ (the data ofFrespectingΓcan besafelyread\nout fromHasF) is defined as follows.\nreadout\nH\n(F::Γ|F;M)Mhas no duplicate items\nsafe\nH\n(F::Γ|F)\nHere, the ‘no duplicate items’ precondition checks the safety on the ownership.\nCOS-based Model.Now we introduce theCOS-based model(COS stands for\nconcrete operational semantics)f\nCOS\nΠ\nto formally describe the expected input-\noutput relation. Here, for simplicity,fis restricted to one that does not take\nlifetime parameters (we call such a functionsimple; the input/output types\nof a simple function cannot contain references). We definef\nCOS\nΠ\nas the pred-\nicate (on values of sorts (|T\n0\n|),...,(|T\nn−1\n|),(|U|) iff’s input/output types are\nT\n0\n,...,T\nn−1\n,U) given by the following rule.\nC\n0\n→\nΠ\n···→\nΠ\nC\nN\nfinal\nΠ\n(C\nN\n)C\n0\n= [f,entry]F|H C\nN\n= [f,L]F\n′\n|H\n′\nsafe\nH\n(\nF::Γ\nΠ,f,entry\n∣\n∣\n{(x\ni\n,v\ni\n)|i∈[n]}\n)\nsafe\nH\n′\n(\nF\n′\n::Γ\nΠ,f,L\n∣\n∣\n{(y,w)}\n)\nf\nCOS\nΠ\n(v\n0\n,...,v\nn−1\n,w)\nΓ\nΠ,f,L\n: the variable context for the labelLoffin the programΠ\nCorrectness Theorem.Finally, the correctness (both soundness and com-\npleteness) of the CHC representation is simply stated as follows.\nTheorem 1 (Correctness of the CHC Representation).For any program\nΠand simple functionfinΠ,f\nCOS\nΠ\nis equivalent toM\nleast\n(|Π|)\n(f\nentry\n).\nProof.The details are presented in Appendix C. We outline the proof below.\nFirst, we introduceabstract operational semantics(Appendix C.1), where we\nget rid of heaps and directly represent each variable in the program simply as\na value withabstract variables, which is strongly related toprophecy variables\n(see§5). An abstract variable represents the undetermined value of a mutable\nreference at the end of borrow.\nNext, we introduceSLDC resolution(Appendix C.3) for CHC systems and\nfind abisimulationbetween abstract operational semantics and SLDC resolution\n(Lemma 3), whereby we show that theAOS-based model, defined analogously\nto the COS-based model, isequivalentto the least model of the CHC repre-\nsentation (Theorem 2). Moreover, we find abisimulationbetween concrete and\nabstract operational semantics (Lemma 5) and prove that the COS-based model\nisequivalentto the AOS-based model (Theorem 3).\nFinally, combining the equivalences of Theorem 2 and Theorem 3, we achieve\nthe proof for the correctness of the CHC representation.ut\n\n18Y. Matsushita et al.\nInterestingly, as by-products of the proof, we have also shown thesoundness\nof the type systemin terms of preservation and progression, in both concrete and\nabstract operational semantics. See Appendix C.2 and Appendix C.4 for details.\nSimplification and generalization of the proofs is left for future work.\n3.4 Advanced Examples\nWe give advanced examples of pointer-manipulating Rust programs and their\nCHC representations. For readability, we write programs in Rust (with ghost\nannotations) instead of COR. In addition, CHCs are written in an informal style\nlike§1, preferring equalities to pattern matching.\nExample 3.Consider the following program, a variant ofjust_recin§1.1.\nfn choose<'a>(ma: &'a mut i32, mb: &'a mut i32) -> &'a mut i32 {\nif rand() {drop ma;mb } else {drop mb;ma }\n}\nfn linger_dec<'a>(ma: &'a mut i32) -> bool {\n*ma -= 1; if rand() >= 0 {drop ma;return true; }\nlet mut b = rand(); let old_b = b;intro 'b;let mb = &'bmut b;\nlet r2 = linger_dec<'b>(choose<'b>(ma, mb));now 'b;\nr2 && old_b >= b\n}\nUnlikejust_rec, the functionlinger_deccan modify the local variable of an\narbitrarily deep ancestor. Interestingly, each recursive call tolinger_deccan\nintroduce a new lifetime'b, which yields arbitrarily many layers of lifetimes.\nSuppose we wish to verify thatlinger_decnever returnsfalse. If we use,\nlikeJustRec\n+\nin§1.1, a predicate taking the memory statesh,h\n′\nand the stack\npointersp, we have to discover the quantified invariant:∀i≤sp.h[i]≥h\n′\n[i]. In\ncontrast, our approach reduces this verification problem to the following CHCs:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=b\n◦\n=b∧r=〈a,a\n◦\n〉\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r)⇐=a\n◦\n=a∧r=〈b,b\n◦\n〉\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧a\n◦\n=a\n′\n∧r=true\nLingerDec(〈a,a\n◦\n〉,r)⇐=a\n′\n=a−1∧oldb=b∧Choose(〈a\n′\n,a\n◦\n〉,〈b,b\n◦\n〉,mc)\n∧LingerDec(mc,r\n′\n)∧r= (r\n′\n&&oldb>=b\n◦\n)\nr=true⇐=LingerDec(〈a,a\n◦\n〉,r).\nThis can be solved by many solvers since it has a very simple model:\nChoose(〈a,a\n◦\n〉,〈b,b\n◦\n〉,r) :⇐⇒(b\n◦\n=b∧r=〈a,a\n◦\n〉)∨(a\n◦\n=a∧r=〈b,b\n◦\n〉)\nLingerDec(〈a,a\n◦\n〉,r) :⇐⇒r=true∧a≥a\n◦\n.\nExample 4.Combined withrecursive data structures, our method turns out to\nbe more interesting. Let us consider the following Rust code:\n22\n22\nIn COR,Listcan be expressed asμX.int×ownX+unit.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)19\nenum List { Cons(i32, Box), Nil } use List::*;\nfn take_some<'a>(mxs: &'a mut List) -> &'a mut i32 {\nmatch mxs {\nCons(mx, mxs2) => if rand() {drop mxs2;mx }\nelse {drop mx;take_some<'a>(mxs2) }\nNil => { take_some(mxs) }\n}\n}\nfn sum(xs: &List) -> i32 {\nmatch xs { Cons(x, xs2) => x + sum(xs2), Nil => 0 }\n}\nfn inc_some(mut xs: List) -> bool {\nlet n = sum(&xs);intro 'a;let my = take_some<'a>(&'amut xs);\n*my += 1;drop my; now 'a;let m = sum(&xs); m == n + 1\n}\nThis is a program that manipulates singly linked integer lists, defined as a re-\ncursive data type.take_sometakes a mutable reference to a list and returns\na mutable reference to some element of the list.sumcalculates the sum of the\nelements of a list.inc_someincrements some element of a list via a mutable\nreference and checks that the sum of the elements of the list has increased by1.\nSuppose we wish to verify thatinc_somenever returnsfalse. Our method\ntranslates this verification problem into the following CHCs.\n23\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧xs\n′\n◦\n=xs\n′\n∧r=〈x,x\n◦\n〉\nTakeSome(〈[x|xs\n′\n],xs\n◦\n〉,r)⇐=xs\n◦\n= [x\n◦\n|xs\n′\n◦\n]∧x\n◦\n=x∧TakeSome(〈xs\n′\n,xs\n′\n◦\n〉,r)\nTakeSome(〈[],xs\n◦\n〉,r)⇐=TakeSome(〈[],xs\n◦\n〉,r)\nSum(〈[x|xs\n′\n]〉,r)⇐=Sum(〈xs\n′\n〉,r\n′\n)∧r=x+r\n′\nSum(〈[]〉,r)⇐=r= 0\nIncSome(xs,r)⇐=Sum(〈xs〉,n)∧TakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉)∧y\n◦\n=y+ 1\n∧Sum(〈xs\n◦\n〉,m)∧r= (m==n+1).\nA crucial technique used here issubdivision of a mutable reference, which is\nachieved with the constraintxs\n◦\n= [x\n◦\n|xs\n′\n◦\n].\nWe can give this CHC system a very simple model, using an auxiliary function\nsum(satisfyingsum([x|xs\n′\n]) :=x+sum(xs\n′\n),sum([]) := 0):\nTakeSome(〈xs,xs\n◦\n〉,〈y,y\n◦\n〉) :⇐⇒y\n◦\n−y=sum(xs\n◦\n)−sum(xs)\nSum(〈xs〉,r) :⇐⇒r=sum(xs)\nIncSome(xs,r) :⇐⇒r=true.\nAlthough the model relies on the functionsum, the validity of the model can be\nchecked without induction onsum(i.e. we can check the validity of each CHC\njust by properly unfolding the definition ofsuma few times).\nThe example can befully automatically and promptlyverified by our approach\nusing HoIce [12,11] as the back-end CHC solver; see§4.\n23\n[x|xs] is the cons made of the headxand the tailxs. [] is the nil. In our formal\nlogic, they are expressed asinj\n0\n(x,〈xs〉) andinj\n1\n().\n\n20Y. Matsushita et al.\n3.5 Discussions\nWe discuss here how our idea can be extended and enhanced.\nApplying Various Verification Techniques.Our idea can also be expressed as a\ntranslation of a pointer-manipulating Rust program into a program of astateless\nfunctional programming language, which allows us to usevarious verification\ntechniquesnot limited to CHCs. Access to future information can be modeled\nusingnon-determinism. To express the valuea\n◦\ncoming at the end of mutable\nborrow in CHCs, we justrandomly guessthe value with non-determinism. At\nthe time we actually release a mutable reference, we justchecka' = aand cut\noff execution branches that do not pass the check.\nFor example,take_max/inc_maxin§1.2/Example 1 can be translated into\nthe following OCaml program.\nlet rec assume b = if b then () else assume b\nlet take_max (a, a') (b, b') =\nif a >= b then (assume (b' = b); (a, a'))\nelse (assume (a' = a); (b, b'))\nlet inc_max a b =\nlet a' = Random.int(0) in let b' = Random.int(0) in\nlet (c, c') = take_max (a, a') (b, b') in\nassume (c' = c + 1); not (a' = b')\nlet main a b = assert (inc_max a b)\n‘let a' = Random.int(0)’ expresses arandom guessand ‘assume (a' = a)’\nexpresses acheck. The original problem “Doesinc_maxnever returnfalse?”\nis reduced to the problem “Doesmainnever fail at assertion?” on the OCaml\nprogram.\n24\nThis representation allows us to use various verification techniques, including\nmodel checking (higher-order, temporal, bounded, etc.), semi-automated verifi-\ncation (e.g. on Boogie [48]) and verification on proof assistants (e.g. Coq [15]).\nThe property to be verified can be not only partial correctness, but also total\ncorrectness and liveness. Further investigation is left for future work.\nVerifying Higher-order Programs.We have to care about the following points in\nmodeling closures:(i)A closure that encloses mutable references can be encoded\nas a pair of the main function and the ‘drop function’ called when the closure is\nreleased;(ii)A closure that updates enclosed data can be encoded as a function\nthat returns, with the main return value, the updated version of the closure;\n(iii)A closure that updates external data through enclosed mutable references\ncan also be modeled by combination of (i) and (ii). Further investigation on\nverification of higher-order Rust programs is left for future work.\n24\nMoCHi [39], a higher-order model checker for OCaml, successfully verified the safety\nproperty for the OCaml representation above. It also successfully and instantly ver-\nified a similar representation ofchoose/linger_decat Example 3.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)21\nLibraries with Unsafe Code.Our translation does not use lifetime information;\nthe correctness of our method is guaranteed by the nature of borrow. Whereas\nlifetimes are used forstatic checkof the borrow discipline, many libraries in Rust\n(e.g.RefCell) provide a mechanism fordynamic ownership check.\nWe believe that such libraries withunsafe codecan be verified for our method\nby a separation logic such as Iris [35,33], as RustBelt [32] does. A good news\nis that Iris has recently incorporatedprophecy variables[34], which seems to fit\nwell with our approach. This is an interesting topic for future work.\nAfter the libraries are verified, we can turn to our method. For an easy\nexample,Vec[58] can be represented simply as a functional array; a muta-\nble/immutable slice&mut[T]/&[T]can be represented as an array of muta-\nble/immutable references. For another example, to deal withRefCell[56], we\npass around anarraythat maps aRefCelladdress to data of typeTequipped\nwith an ownership counter;RefCellitself is modeled simply as an address.\n2526\nImportantly,at the very time we take a mutable reference〈a,a\n◦\n〉from a ref-cell,\nthe data at the array should be updated intoa\n◦\n. Using methods such as pointer\nanalysis [61], we can possibly shrink the array.\nStill, our method does not go quite well withmemory leaks[52] caused for\nexample by combination ofRefCellandRc[57], because they obfuscate the\nownership release of mutable references. We think that use ofRcetc. should\nrather be restricted for smooth verification. Further investigation is needed.\n4 Implementation and Evaluation\nWe report on the implementation of our verification tool and the preliminary\nexperiments conducted with small benchmarks to confirm the effectiveness of\nour approach.\n4.1 Implementation of RustHorn\nWe implemented a prototype verification toolRustHorn(available athttps:\n//github.com/hopv/rust-horn) based on the ideas described above. The tool\nsupports basic features of Rust supported in COR, including recursions and\nrecursive types especially.\nThe implementation translates the MIR (Mid-level Intermediate Representa-\ntion) [45,51] of a Rust program into CHCs quite straightforwardly.\n27\nThanks to\nthe nature of the translation, RustHorn can just rely on Rust’s borrow check and\nforget about lifetimes. For efficiency, the predicate variables are constructed by\n25\nTo borrow a mutable/immutable reference fromRefCell, we check and update the\ncounter and take out the data from the array.\n26\nIn Rust, we can useRefCellto naturally encode data types with circular references\n(e.g. doubly-linked lists).\n27\nIn order to use the MIR, RustHorn’s implementation depends on the unstable\nnightly version of the Rust compiler, which causes a slight portability issue.\n\n22Y. Matsushita et al.\nthe granularity of the vertices in the control-flow graph in MIR, unlike the per-\nlabel construction of§3.2. Also, assertions in functions are taken into account\nunlike the formalization in§3.2.\n4.2 Benchmarks and Experiments\nTo measure the performance of RustHorn and the existing CHC-based verifier\nSeaHorn [23], we conducted preliminary experiments with benchmarks listed in\nTable 1. Each benchmark program is designed so that the Rust and C versions\nmatch. Each benchmark instance consists of either one program or a pair of safe\nand unsafe programs that are very similar to each other. The benchmarks and\nexperimental results are accessible athttps://github.com/hopv/rust-horn.\nThe benchmarks in the groupssimpleandbmcwere taken from SeaHorn\n(https://github.com/seahorn/seahorn/tree/master/test), with the Rust\nversions written by us. They have been chosen based on the following criteria:\nthey (i) consist of only features supported by core Rust, (ii) follow Rust’s owner-\nship discipline, and (iii) are small enough to be amenable for manual translation\nfrom C to Rust.\nThe remaining six benchmark groups are built by us and consist of programs\nfeaturing mutable references. The groupsinc-max,just-recandlinger-dec\nare based on the examples that have appeared in§1 and§3.4. The group\nswap-decconsists of programs that perform repeated involved updates via mu-\ntable references to mutable references. The groupslistsandtreesfeature\ndestructive updates on recursive data structures (lists and trees) via mutable\nreferences, with one interesting program of it explained in§3.4.\nWe conducted experiments on a commodity laptop (2.6GHz Intel Core i7\nMacBook Pro with 16GB RAM). First we translated each benchmark program\nby RustHorn and SeaHorn (version 0.1.0-rc3) [23] translate into CHCs in the\nSMT-LIB 2 format. Both RustHorn and SeaHorn generated CHCs sufficiently\nfast (about 0.1 second for each program). After that, we measured the time of\nCHC solving by Spacer [40] in Z3 (version 4.8.7) [69] and HoIce (version 1.8.1)\n[12,11] for the generated CHCs. SeaHorn’s outputs were not accepted by HoIce,\nespecially because SeaHorn generates CHCs with arrays. We also made modified\nversions for some of SeaHorn’s CHC outputs, adding constraints on address\nfreshness, to improve accuracy of representations and reduce false alarms.\n28\n4.3 Experimental Results\nTable 1 shows the results of the experiments.\nInterestingly, the combination of RustHorn and HoIce succeeded in verify-\ning many programs with recursive data types (listsandtrees), although it\n28\nForbase/3andrepeat/3ofinc-max, the address-taking parts were already re-\nmoved, probably by inaccurate pointer analysis.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)23\nRustHornSeaHornw/Spacer\nGroupInstancePropertyw/Spacer w/HoIceas ismodified\nsimple\n01safe<0.1<0.1<0.1\n04-recursivesafe0.5timeout0.8\n05-recursiveunsafe<0.1<0.1<0.1\n06-loopsafetimeout0.1timeout\nhhk2008safetimeout40.5<0.1\nunique-scalarunsafe\n<0.1<0.1<0.1\nbmc\n1\nsafe0.2<0.1<0.1\nunsafe0.2<0.1<0.1\n2\nsafetimeout0.1<0.1\nunsafe<0.1<0.1<0.1\n3\nsafe<0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-1\nsafe0.1<0.1<0.1\nunsafe<0.1<0.1<0.1\ndiamond-2\nsafe0.2<0.1<0.1\nunsafe<0.1<0.1<0.1\ninc-max\nbase\nsafe\n<0.1<0.1false alarm<0.1\nunsafe<0.1<0.1<0.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe0.1<0.1<0.1\nrepeat\nsafe\n0.1timeoutfalse alarm0.1\nunsafe\n<0.10.4<0.1<0.1\nrepeat/3\nsafe\n0.2timeout<0.1\nunsafe\n<0.11.3<0.1\nswap-dec\nbase\nsafe<0.1<0.1false alarm<0.1\nunsafe\n0.1timeout<0.1<0.1\nbase/3\nsafe0.2timeoutfalse alarm<0.1\nunsafe\n0.40.9<0.10.1\nexact\nsafe0.10.5false alarm timeout\nunsafe\n<0.126.0<0.1<0.1\nexact/3\nsafetimeout timeoutfalse alarm false alarm\nunsafe\n<0.10.4<0.1<0.1\njust-rec base\nsafe<0.1<0.1<0.1\nunsafe<0.10.1<0.1\nlinger-dec\nbase\nsafe<0.1<0.1false alarm\nunsafe<0.10.1<0.1\nbase/3\nsafe<0.1<0.1false alarm\nunsafe<0.17.0<0.1\nexact\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.2<0.1\nexact/3\nsafe\n<0.1<0.1false alarm\nunsafe<0.10.6<0.1\nlists\nappend\nsafetool error<0.1false alarm\nunsafetool error0.20.1\ninc-all\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.3<0.1\ninc-some\nsafe\ntool error<0.1false alarm\nunsafe\ntool error0.30.1\ninc-some/2\nsafetool error timeoutfalse alarm\nunsafetool error0.30.4\ntrees\nappend-t\nsafetool error<0.1timeout\nunsafetool error0.30.1\ninc-all-t\nsafetool error timeouttimeout\nunsafetool error0.1<0.1\ninc-some-t\nsafetool error timeouttimeout\nunsafetool error0.30.1\ninc-some/2-t\nsafetool error timeoutfalse alarm\nunsafetool error0.40.1\nTable 1.Benchmarks and experimental results on RustHorn and SeaHorn, with\nSpacer/Z3 and HoIce. “timeout” denotes timeout of 180 seconds; “false alarm” means\nreporting ‘unsafe’ for a safe program; “tool error” is a tool error of Spacer, which\ncurrently does not deal with recursive types well.\n\n24Y. Matsushita et al.\nfailed at difficult programs.\n29\nHoIce, unlike Spacer, can find models defined with\nprimitive recursive functions for recursive data types.\n30\nFalse alarms of SeaHorn for the last six groups are mainly due to problematic\napproximation of SeaHorn for pointers and heap memories, as discussed in§1.1.\nOn the modified CHC outputs of SeaHorn, five false alarms were erased and four\nof them became successful. For the last four groups, unboundedly many mem-\nory cells can be allocated, which imposes a fundamental challenge for SeaHorn’s\narray-based approach as discussed in§1.1.\n31\nThe combination of RustHorn and\nHoIce took a relatively long time or reported timeout for some programs, includ-\ning unsafe ones, because HoIce is still an unstable tool compared to Spacer; in\ngeneral, automated CHC solving can be rather unstable.\n5 Related Work\nCHC-based Verification of Pointer-Manipulating Programs.SeaHorn [23] is a\nrepresentative existing tool for CHC-based verification of pointer-manipulating\nprograms. It basically represents the heap memory as an array. Although some\npointer analyses [24] are used to optimize the array representation of the heap,\ntheir approach suffers from the scalability problem discussed in§1.1, as confirmed\nby the experiments in§4. Still, their approach is quite effective as automated\nverification, given that many real-world pointer-manipulating programs do not\nfollow Rust-style ownership.\nAnother approach is taken by JayHorn [37,36], which translates Java pro-\ngrams (possibly using object pointers) to CHCs. They represent store invariants\nusing special predicatespullandpush. Although this allows faster reasoning\nabout the heap than the array-based approach, it can suffer from more false\nalarms. We conducted a small experiment for JayHorn (0.6-alpha) on some of\nthe benchmarks of§4.2; unexpectedly, JayHorn reported ‘UNKNOWN’ (instead of\n‘SAFE’ or ‘UNSAFE’) for even simple programs such as the programs of the instance\nunique-scalarinsimpleand the instancebasicininc-max.\nVerification for Rust.Whereas we have presented the first CHC-based (fully au-\ntomated) verification method specially designed for Rust-style ownership, there\nhave been a number of studies on other types of verification for Rust.\nRustBelt [32] aims to formally prove high-level safety properties for Rust\nlibraries with unsafe internal implementation, using manual reasoning on the\nhigher-order concurrent separation logic Iris [35,33] on the Coq Proof Assistant\n[15]. Although their framework is flexible, the automation of the reasoning on\n29\nFor example,inc-some/2takes two mutable references in a list and increments on\nthem;inc-all-tdestructively increments all elements in a tree.\n30\nWe used the latest version of HoIce, whose algorithm for recursive types is presented\nin the full paper of [11].\n31\nWe also tried on SpacerJustRec\n+\n, the stack-pointer-based accurate representation\nofjust_recpresented in§1.1, but we got timeout of 180 seconds.\n\nRustHorn: CHC-based Verification for Rust Programs (full version)25\nthe framework is little discussed. The language design of our COR is affected by\ntheir formal calculusλ\nRust\n.\nElectrolysis [67] translates some subset of Rust into a purely functional pro-\ngramming language to manually verify functional correctness on Lean Theorem\nProver [49]. Although it clears out pointers to get simple models like our ap-\nproach, Electrolysis’ applicable scope is quite limited, because it deals with mu-\ntable references bysimple static tracking of addresses based on lenses[20], not\nsupporting even basic use cases such as dynamic selection of mutable references\n(e.g.take_maxin§1.2) [66], which our method can easily handle. Our approach\ncoversallusages of pointers of the safe core of Rust as discussed in§3.\nSome serial studies [27,3,17] conduct (semi-)automated verification on Rust\nprograms using Viper [50], a verification platform based on separation logic with\nfractional ownership. This approach can to some extent deal with unsafe code\n[27] and type traits [17]. Astrauskas et al. [3] conduct semi-automated verifi-\ncation (manually providing pre/post-conditions and loop invariants) on many\nrealistic examples. Because Viper is based onfractional ownership, however,\ntheir platforms have to useconcrete indexing on the memoryfor programs like\ntake_max/inc_max. In contrast, our idea leveragesborrow-based ownership, and\nit can be applied also to semi-automated verification as suggested in§3.5.\nSome researches [65,4,44] employ bounded model checking on Rust programs,\nespecially with unsafe code. Our method can be applied to bounded model check-\ning as discussed in§3.5.\nVerification using Ownership.Ownership has been applied to a wide range of\nverification. It has been used for detecting race conditions on concurrent pro-\ngrams [8,64] and analyzing the safety of memory allocation [63]. Separation logic\nbased on ownership is also studied well [7,50,35]. Some verification platforms\n[14,5,21] support simple ownership. However, most prior studies on ownership-\nbased verification are based on fractional or counting ownership. Verification\nunderborrow-based ownershiplike Rust was little studied before our work.\nProphecy Variables.Our idea of taking a future value to represent a mutable\nreference is linked to the notion ofprophecy variables[1,68,34]. Jung et al. [34]\npropose a new Hoare-style logic with prophecy variables. In their logic, prophecy\nvariables are not copyable, which is analogous to uncopyability of mutable ref-\nerences in Rust. This logic can probably be used for generalizing our idea as\nsuggested in§3.5.\n6 Conclusion\nWe have proposed a novel method for CHC-based program verification, which\nrepresents a mutable reference as a pair of values, the current value and the\nfuture value at the time of release. We have formalized the method for a core\nlanguage of Rust and proved its correctness. We have implemented a proto-\ntype verification tool for a subset of Rust and confirmed the effectiveness of our\n\n26Y. Matsushita et al.\napproach. We believe that this study establishes the foundation of verification\nleveraging borrow-based ownership.\nAcknowledgments.This work was supported by JSPS KAKENHI Grant\nNumber JP15H05706 and JP16K16004. We are grateful to the anonymous re-\nviewers for insightful comments.\nReferences\n1. Abadi, M., Lamport, L.: The existence of refinement mappings. Theor. Comput.\nSci.82(2), 253–284 (1991). https://doi.org/10.1016/0304-3975(91)90224-P\n2. Alberti, F., Bruttomesso, R., Ghilardi, S., Ranise, S., Sharygina, N.: Lazy ab-\nstraction with interpolants for arrays. In: Bjørner, N., Voronkov, A. (eds.)\nLogic for Programming, Artificial Intelligence, and Reasoning - 18th Interna-\ntional Conference, LPAR-18, M ́erida, Venezuela, March 11-15, 2012. Proceed-\nings. Lecture Notes in Computer Science, vol. 7180, pp. 46–61. Springer (2012).\nhttps://doi.org/10.1007/978-3-642-28717-6\n7\n3. Astrauskas, V., M ̈uller, P., Poli, F., Summers, A.J.: Leveraging Rust types\nfor modular specification and verification (2018). https://doi.org/10.3929/ethz-b-\n000311092\n4. Baranowski, M.S., He, S., Rakamaric, Z.: Verifying Rust programs with SMACK.\nIn: Lahiri and Wang [42], pp. 528–535. https://doi.org/10.1007/978-3-030-01090-\n432\n5. Barnett, M., F ̈ahndrich, M., Leino, K.R.M., M ̈uller, P., Schulte, W., Venter, H.:\nSpecification and verification: The Spec# experience. Commun. ACM54(6), 81–91\n(2011). https://doi.org/10.1145/1953122.1953145\n6. Bjørner, N., Gurfinkel, A., McMillan, K.L., Rybalchenko, A.: Horn clause\nsolvers for program verification. In: Beklemishev, L.D., Blass, A., Dershowitz,\nN., Finkbeiner, B., Schulte, W. (eds.) Fields of Logic and Computation II\n- Essays Dedicated to Yuri Gurevich on the Occasion of His 75th Birthday.\nLecture Notes in Computer Science, vol. 9300, pp. 24–51. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-23534-9\n2\n7. Bornat, R., Calcagno, C., O’Hearn, P.W., Parkinson, M.J.: Permission accounting\nin separation logic. In: Palsberg, J., Abadi, M. (eds.) Proceedings of the 32nd\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2005, Long Beach, California, USA, January 12-14, 2005. pp. 259–270. ACM\n(2005). https://doi.org/10.1145/1040305.1040327\n8. Boyapati, C., Lee, R., Rinard, M.C.: Ownership types for safe program-\nming: Preventing data races and deadlocks. In: Ibrahim, M., Matsuoka,\nS. (eds.) Proceedings of the 2002 ACM SIGPLAN Conference on Object-\nOriented Programming Systems, Languages and Applications, OOPSLA 2002,\nSeattle, Washington, USA, November 4-8, 2002. pp. 211–230. ACM (2002).\nhttps://doi.org/10.1145/582419.582440\n9. Boyland, J.: Checking interference with fractional permissions. In: Cousot, R. (ed.)\nStatic Analysis, 10th International Symposium, SAS 2003, San Diego, CA, USA,\nJune 11-13, 2003, Proceedings. Lecture Notes in Computer Science, vol. 2694, pp.\n55–72. Springer (2003). https://doi.org/10.1007/3-540-44898-5\n4\n\nRustHorn: CHC-based Verification for Rust Programs (full version)27\n10. Bradley, A.R., Manna, Z., Sipma, H.B.: What’s decidable about arrays? In: Emer-\nson, E.A., Namjoshi, K.S. (eds.) Verification, Model Checking, and Abstract In-\nterpretation, 7th International Conference, VMCAI 2006, Charleston, SC, USA,\nJanuary 8-10, 2006, Proceedings. Lecture Notes in Computer Science, vol. 3855,\npp. 427–442. Springer (2006). https://doi.org/10.1007/11609773\n28\n11. Champion, A., Chiba, T., Kobayashi, N., Sato, R.: ICE-based refinement type\ndiscovery for higher-order functional programs. In: Beyer, D., Huisman, M. (eds.)\nTools and Algorithms for the Construction and Analysis of Systems - 24th Interna-\ntional Conference, TACAS 2018, Held as Part of the European Joint Conferences\non Theory and Practice of Software, ETAPS 2018, Thessaloniki, Greece, April 14-\n20, 2018, Proceedings, Part I. Lecture Notes in Computer Science, vol. 10805, pp.\n365–384. Springer (2018). https://doi.org/10.1007/978-3-319-89960-2\n20\n12. Champion, A., Kobayashi, N., Sato, R.: HoIce: An ICE-based non-linear Horn\nclause solver. In: Ryu, S. (ed.) Programming Languages and Systems - 16th Asian\nSymposium, APLAS 2018, Wellington, New Zealand, December 2-6, 2018, Pro-\nceedings. Lecture Notes in Computer Science, vol. 11275, pp. 146–156. Springer\n(2018). https://doi.org/10.1007/978-3-030-02768-1\n8\n13. Clarke, D.G., Potter, J., Noble, J.: Ownership types for flexible alias protection.\nIn: Freeman-Benson, B.N., Chambers, C. (eds.) Proceedings of the 1998 ACM\nSIGPLAN Conference on Object-Oriented Programming Systems, Languages &\nApplications (OOPSLA ’98), Vancouver, British Columbia, Canada, October 18-\n22, 1998. pp. 48–64. ACM (1998). https://doi.org/10.1145/286936.286947\n14. Cohen, E., Dahlweid, M., Hillebrand, M.A., Leinenbach, D., Moskal, M., Santen,\nT., Schulte, W., Tobies, S.: VCC: A practical system for verifying concurrent C. In:\nBerghofer, S., Nipkow, T., Urban, C., Wenzel, M. (eds.) Theorem Proving in Higher\nOrder Logics, 22nd International Conference, TPHOLs 2009, Munich, Germany,\nAugust 17-20, 2009. Proceedings. Lecture Notes in Computer Science, vol. 5674,\npp. 23–42. Springer (2009). https://doi.org/10.1007/978-3-642-03359-9\n2\n15. Coq Team: The Coq proof assistant (2020),https://coq.inria.fr/\n16. van Emden, M.H., Kowalski, R.A.: The semantics of predicate logic as\na programming language. Journal of the ACM23(4), 733–742 (1976).\nhttps://doi.org/10.1145/321978.321991\n17. Erdin, M.: Verification of Rust Generics, Typestates, and Traits. Master’s thesis,\nETH Z ̈urich (2019)\n18. Fedyukovich, G., Kaufman, S.J., Bod ́ık, R.: Sampling invariants from frequency\ndistributions. In: Stewart, D., Weissenbacher, G. (eds.) 2017 Formal Methods in\nComputer Aided Design, FMCAD 2017, Vienna, Austria, October 2-6, 2017. pp.\n100–107. IEEE (2017). https://doi.org/10.23919/FMCAD.2017.8102247\n19. Fedyukovich, G., Prabhu, S., Madhukar, K., Gupta, A.: Quantified invariants via\nsyntax-guided synthesis. In: Dillig, I., Tasiran, S. (eds.) Computer Aided Verifica-\ntion - 31st International Conference, CAV 2019, New York City, NY, USA, July\n15-18, 2019, Proceedings, Part I. Lecture Notes in Computer Science, vol. 11561,\npp. 259–277. Springer (2019). https://doi.org/10.1007/978-3-030-25540-4\n14\n20. Foster, J.N., Greenwald, M.B., Moore, J.T., Pierce, B.C., Schmitt, A.: Com-\nbinators for bidirectional tree transformations: A linguistic approach to the\nview-update problem. ACM Trans. Program. Lang. Syst.29(3),17 (2007).\nhttps://doi.org/10.1145/1232420.1232424\n21. Gondelman, L.: Un syst`eme de types pragmatique pour la v ́erification d ́eductive des\nprogrammes. (A Pragmatic Type System for Deductive Verification). Ph.D. thesis,\nUniversity of Paris-Saclay, France (2016),https://tel.archives-ouvertes.fr/\ntel-01533090\n\n28Y. Matsushita et al.\n22. Grebenshchikov, S., Lopes, N.P., Popeea, C., Rybalchenko, A.: Synthesizing soft-\nware verifiers from proof rules. In: Vitek, J., Lin, H., Tip, F. (eds.) ACM\nSIGPLAN Conference on Programming Language Design and Implementation,\nPLDI ’12, Beijing, China - June 11 - 16, 2012. pp. 405–416. ACM (2012).\nhttps://doi.org/10.1145/2254064.2254112\n23. Gurfinkel, A., Kahsai, T., Komuravelli, A., Navas, J.A.: The SeaHorn verification\nframework. In: Kroening, D., Pasareanu, C.S. (eds.) Computer Aided Verification\n- 27th International Conference, CAV 2015, San Francisco, CA, USA, July 18-\n24, 2015, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9206, pp.\n343–361. Springer (2015). https://doi.org/10.1007/978-3-319-21690-4\n20\n24. Gurfinkel, A., Navas, J.A.: A context-sensitive memory model for verification of\nC/C++ programs. In: Ranzato, F. (ed.) Static Analysis - 24th International Sym-\nposium, SAS 2017, New York, NY, USA, August 30 - September 1, 2017, Proceed-\nings. Lecture Notes in Computer Science, vol. 10422, pp. 148–168. Springer (2017).\nhttps://doi.org/10.1007/978-3-319-66706-5\n8\n25. Gurfinkel, A., Shoham, S., Meshman, Y.: SMT-based verification of parameterized\nsystems. In: Zimmermann, T., Cleland-Huang, J., Su, Z. (eds.) Proceedings of\nthe 24th ACM SIGSOFT International Symposium on Foundations of Software\nEngineering, FSE 2016, Seattle, WA, USA, November 13-18, 2016. pp. 338–348.\nACM (2016). https://doi.org/10.1145/2950290.2950330\n26. Gurfinkel, A., Shoham, S., Vizel, Y.: Quantifiers on demand. In: Lahiri and Wang\n[42], pp. 248–266. https://doi.org/10.1007/978-3-030-01090-415\n27. Hahn, F.: Rust2Viper: Building a Static Verifier for Rust. Master’s thesis, ETH\nZ ̈urich (2016). https://doi.org/10.3929/ethz-a-010669150\n28. Hoenicke, J., Majumdar, R., Podelski, A.: Thread modularity at many levels: A\npearl in compositional verification. In: Castagna, G., Gordon, A.D. (eds.) Pro-\nceedings of the 44th ACM SIGPLAN Symposium on Principles of Programming\nLanguages, POPL 2017, Paris, France, January 18-20, 2017. pp. 473–485. ACM\n(2017). https://doi.org/10.1145/3009837\n29. Hojjat, H., R ̈ummer, P.: TheEldaricaHorn solver. In: Bjørner, N., Gurfinkel,\nA. (eds.) 2018 Formal Methods in Computer Aided Design, FMCAD 2018,\nAustin, TX, USA, October 30 - November 2, 2018. pp. 1–7. IEEE (2018).\nhttps://doi.org/10.23919/FMCAD.2018.8603013\n30. Horn, A.: On sentences which are true of direct unions of algebras. The Journal of\nSymbolic Logic16(1), 14–21 (1951),http://www.jstor.org/stable/2268661\n31. Jim, T., Morrisett, J.G., Grossman, D., Hicks, M.W., Cheney, J., Wang, Y.: Cy-\nclone: A safe dialect of C. In: Ellis, C.S. (ed.) Proceedings of the General Track:\n2002 USENIX Annual Technical Conference, June 10-15, 2002, Monterey, Califor-\nnia, USA. pp. 275–288. USENIX (2002),http://www.usenix.org/publications/\nlibrary/proceedings/usenix02/jim.html\n32. Jung, R., Jourdan, J., Krebbers, R., Dreyer, D.: RustBelt: Securing the founda-\ntions of the Rust programming language. PACMPL2(POPL), 66:1–66:34 (2018).\nhttps://doi.org/10.1145/3158154\n33. Jung, R., Krebbers, R., Jourdan, J., Bizjak, A., Birkedal, L., Dreyer, D.: Iris from\nthe ground up: A modular foundation for higher-order concurrent separation logic.\nJ. Funct. Program.28, e20 (2018). https://doi.org/10.1017/S0956796818000151\n34. Jung, R., Lepigre, R., Parthasarathy, G., Rapoport, M., Timany, A., Dreyer, D.,\nJacobs, B.: The future is ours: Prophecy variables in separation logic. PACMPL\n4(POPL), 45:1–45:32 (2020). https://doi.org/10.1145/3371113\n\nRustHorn: CHC-based Verification for Rust Programs (full version)29\n35. Jung, R., Swasey, D., Sieczkowski, F., Svendsen, K., Turon, A., Birkedal, L.,\nDreyer, D.: Iris: Monoids and invariants as an orthogonal basis for concurrent\nreasoning. In: Rajamani, S.K., Walker, D. (eds.) Proceedings of the 42nd Annual\nACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages,\nPOPL 2015, Mumbai, India, January 15-17, 2015. pp. 637–650. ACM (2015).\nhttps://doi.org/10.1145/2676726.2676980\n36. Kahsai, T., Kersten, R., R ̈ummer, P., Sch ̈af, M.: Quantified heap invariants for\nobject-oriented programs. In: Eiter, T., Sands, D. (eds.) LPAR-21, 21st Interna-\ntional Conference on Logic for Programming, Artificial Intelligence and Reasoning,\nMaun, Botswana, May 7-12, 2017. EPiC Series in Computing, vol. 46, pp. 368–384.\nEasyChair (2017)\n37. Kahsai, T., R ̈ummer, P., Sanchez, H., Sch ̈af, M.: JayHorn: A framework for ver-\nifying Java programs. In: Chaudhuri, S., Farzan, A. (eds.) Computer Aided Ver-\nification - 28th International Conference, CAV 2016, Toronto, ON, Canada, July\n17-23, 2016, Proceedings, Part I. Lecture Notes in Computer Science, vol. 9779,\npp. 352–358. Springer (2016). https://doi.org/10.1007/978-3-319-41528-4\n19\n38. Kalra, S., Goel, S., Dhawan, M., Sharma, S.:Zeus: Analyzing safety of smart\ncontracts. In: 25th Annual Network and Distributed System Security Symposium,\nNDSS 2018, San Diego, California, USA, February 18-21, 2018. The Internet So-\nciety (2018)\n39. Kobayashi, N., Sato, R., Unno, H.: Predicate abstraction and CEGAR for higher-\norder model checking. In: Hall, M.W., Padua, D.A. (eds.) Proceedings of the 32nd\nACM SIGPLAN Conference on Programming Language Design and Implementa-\ntion, PLDI 2011, San Jose, CA, USA, June 4-8, 2011. pp. 222–233. ACM (2011).\nhttps://doi.org/10.1145/1993498.1993525\n40. Komuravelli, A., Gurfinkel, A., Chaki, S.: SMT-based model checking for recursive\nprograms. In: Biere, A., Bloem, R. (eds.) Computer Aided Verification - 26th Inter-\nnational Conference, CAV 2014, Held as Part of the Vienna Summer of Logic, VSL\n2014, Vienna, Austria, July 18-22, 2014. Proceedings. Lecture Notes in Computer\nScience, vol. 8559, pp. 17–34. Springer (2014). https://doi.org/10.1007/978-3-319-\n08867-9\n2\n41. Lahiri, S.K., Bryant, R.E.: Constructing quantified invariants via predicate ab-\nstraction. In: Steffen, B., Levi, G. (eds.) Verification, Model Checking, and Ab-\nstract Interpretation, 5th International Conference, VMCAI 2004, Venice, Italy,\nJanuary 11-13, 2004, Proceedings. Lecture Notes in Computer Science, vol. 2937,\npp. 267–281. Springer (2004). https://doi.org/10.1007/978-3-540-24622-0\n22\n42. Lahiri, S.K., Wang, C. (eds.): Automated Technology for Verification and Analysis\n- 16th International Symposium, ATVA 2018, Los Angeles, CA, USA, October\n7-10, 2018, Proceedings, Lecture Notes in Computer Science, vol. 11138. Springer\n(2018). https://doi.org/10.1007/978-3-030-01090-4\n43. Lattner, C., Adve, V.S.: Automatic pool allocation: Improving performance by\ncontrolling data structure layout in the heap. In: Sarkar, V., Hall, M.W. (eds.)\nProceedings of the ACM SIGPLAN 2005 Conference on Programming Language\nDesign and Implementation, Chicago, IL, USA, June 12-15, 2005. pp. 129–142.\nACM (2005). https://doi.org/10.1145/1065010.1065027\n44. Lindner, M., Aparicius, J., Lindgren, P.: No panic! Verification of Rust programs\nby symbolic execution. In: 16th IEEE International Conference on Industrial Infor-\nmatics, INDIN 2018, Porto, Portugal, July 18-20, 2018. pp. 108–114. IEEE (2018).\nhttps://doi.org/10.1109/INDIN.2018.8471992\n\n30Y. Matsushita et al.\n45. Matsakis, N.D.: Introducing MIR (2016),https://blog.rust-lang.org/2016/\n04/19/MIR.html\n46. Matsakis, N.D., Klock II, F.S.: The Rust language. In: Feldman, M., Taft, S.T.\n(eds.) Proceedings of the 2014 ACM SIGAda annual conference on High integrity\nlanguage technology, HILT 2014, Portland, Oregon, USA, October 18-21, 2014. pp.\n103–104. ACM (2014). https://doi.org/10.1145/2663171.2663188\n47. Matsushita, Y., Tsukada, T., Kobayashi, N.: RustHorn: CHC-based verification\nfor Rust programs (full version). In: M ̈uller, P. (ed.) Programming Languages and\nSystems - 29th European Symposium on Programming, ESOP 2020, Held as Part\nof the European Joint Conferences on Theory and Practice of Software, ETAPS\n2020, Dublin, Ireland, April 25-30, 2020, Proceedings. Lecture Notes in Computer\nScience, Springer (2020)\n48. Microsoft: Boogie: An intermediate verification language (2020),https:\n//www.microsoft.com/en-us/research/project/boogie-an-intermediate-\nverification-language/\n49. de Moura, L.M., Kong, S., Avigad, J., van Doorn, F., von Raumer, J.: The\nLean theorem prover (system description). In: Felty, A.P., Middeldorp, A.\n(eds.) Automated Deduction - CADE-25 - 25th International Conference on\nAutomated Deduction, Berlin, Germany, August 1-7, 2015, Proceedings. Lec-\nture Notes in Computer Science, vol. 9195, pp. 378–388. Springer (2015).\nhttps://doi.org/10.1007/978-3-319-21401-6\n26\n50. M ̈uller, P., Schwerhoff, M., Summers, A.J.: Viper: A verification infrastructure\nfor permission-based reasoning. In: Jobstmann, B., Leino, K.R.M. (eds.) Verifi-\ncation, Model Checking, and Abstract Interpretation - 17th International Con-\nference, VMCAI 2016, St. Petersburg, FL, USA, January 17-19, 2016. Proceed-\nings. Lecture Notes in Computer Science, vol. 9583, pp. 41–62. Springer (2016).\nhttps://doi.org/10.1007/978-3-662-49122-5\n2\n51. Rust Community: The MIR (Mid-level IR) (2020),https://rust-lang.github.\nio/rustc-guide/mir/index.html\n52. Rust Community: Reference cycles can leak memory - the Rust programming lan-\nguage (2020),https://doc.rust-lang.org/book/ch15-06-reference-cycles.\nhtml\n53. Rust Community: RFC 2025: Nested method calls (2020),https://rust-lang.\ngithub.io/rfcs/2025-nested-method-calls.html\n54. Rust Community: RFC 2094: Non-lexical lifetimes (2020),https://rust-lang.\ngithub.io/rfcs/2094-nll.html\n55. Rust Community: Rust programming language (2020),https://www.rust-lang.\norg/\n56. Rust Community: std::cell::RefCell - Rust (2020),https://doc.rust-lang.org/\nstd/cell/struct.RefCell.html\n57. Rust Community: std::rc::Rc - Rust (2020),https://doc.rust-lang.org/std/\nrc/struct.Rc.html\n58. Rust Community: std::vec::Vec - Rust (2020),https://doc.rust-lang.org/std/\nvec/struct.Vec.html\n59. Rust Community: Two-phase borrows (2020),https://rust-lang.github.io/\nrustc-guide/borrow_check/two_phase_borrows.html\n60. Sato, R., Iwayama, N., Kobayashi, N.: Combining higher-order model checking with\nrefinement type inference. In: Hermenegildo, M.V., Igarashi, A. (eds.) Proceedings\nof the 2019 ACM SIGPLAN Workshop on Partial Evaluation and Program Manip-\nulation, PEPM@POPL 2019, Cascais, Portugal, January 14-15, 2019. pp. 47–53.\nACM (2019). https://doi.org/10.1145/3294032.3294081\n\nRustHorn: CHC-based Verification for Rust Programs (full version)31\n61. Steensgaard, B.: Points-to analysis in almost linear time. In: Boehm, H., Jr., G.L.S.\n(eds.) Conference Record of POPL’96: The 23rd ACM SIGPLAN-SIGACT Sym-\nposium on Principles of Programming Languages, Papers Presented at the Sympo-\nsium, St. Petersburg Beach, Florida, USA, January 21-24, 1996. pp. 32–41. ACM\nPress (1996). https://doi.org/10.1145/237721.237727\n62. Stump, A., Barrett, C.W., Dill, D.L., Levitt, J.R.: A decision procedure for an ex-\ntensional theory of arrays. In: 16th Annual IEEE Symposium on Logic in Computer\nScience, Boston, Massachusetts, USA, June 16-19, 2001, Proceedings. pp. 29–37.\nIEEE Computer Society (2001). https://doi.org/10.1109/LICS.2001.932480\n63. Suenaga, K., Kobayashi, N.: Fractional ownerships for safe memory dealloca-\ntion. In: Hu, Z. (ed.) Programming Languages and Systems, 7th Asian Sym-\nposium, APLAS 2009, Seoul, Korea, December 14-16, 2009. Proceedings. Lec-\nture Notes in Computer Science, vol. 5904, pp. 128–143. Springer (2009).\nhttps://doi.org/10.1007/978-3-642-10672-9\n11\n64. Terauchi, T.: Checking race freedom via linear programming. In: Gupta, R., Ama-\nrasinghe, S.P. (eds.) Proceedings of the ACM SIGPLAN 2008 Conference on Pro-\ngramming Language Design and Implementation, Tucson, AZ, USA, June 7-13,\n2008. pp. 1–10. ACM (2008). https://doi.org/10.1145/1375581.1375583\n65. Toman, J., Pernsteiner, S., Torlak, E.:crust: A bounded verifier for Rust.\nIn: Cohen, M.B., Grunske, L., Whalen, M. (eds.) 30th IEEE/ACM Interna-\ntional Conference on Automated Software Engineering, ASE 2015, Lincoln,\nNE, USA, November 9-13, 2015. pp. 75–80. IEEE Computer Society (2015).\nhttps://doi.org/10.1109/ASE.2015.77\n66. Ullrich, S.: Electrolysis reference (2016),http://kha.github.io/electrolysis/\n67. Ullrich, S.: Simple Verification of Rust Programs via Functional Purification. Mas-\nter’s thesis, Karlsruhe Institute of Technology (2016)\n68. Vafeiadis, V.: Modular fine-grained concurrency verification. Ph.D. thesis, Univer-\nsity of Cambridge, UK (2008),http://ethos.bl.uk/OrderDetails.do?uin=uk.\nbl.ethos.612221\n69. Z3 Team: The Z3 theorem prover (2020),https://github.com/Z3Prover/z3\nOpen AccessThis chapter is licensed under the terms of the Creative Commons\nAttribution 4.0 International License (http://creativecommons.org/licenses/by/\n4.0/), which permits use, sharing, adaptation, distribution and reproduction in any\nmedium or format, as long as you give appropriate credit to the original author(s) and\nthe source, provide a link to the Creative Commons license and indicate if changes\nwere made.\nThe images or other third party material in this chapter are included in the chapter’s\nCreative Commons license, unless indicated otherwise in a credit line to the material. If\nmaterial is not included in the chapter’s Creative Commons license and your intended\nuse is not permitted by statutory regulation or exceeds the permitted use, you will need\nto obtain permission directly from the copyright holder.\n\n32Y. Matsushita et al.\nA Complementary Definitions on COR\nA.1 Complete Typing Rules for Instructions\nThe following is the complete rules for the typing judgment on instructions\nI:\nΠ,f\n(Γ,A)→(Γ\n′\n,A\n′\n). The variables on the right-hand side of one instruction\nshould be mutually distinct. The rules for subtypingT≤\nA\nUare explained later.\nα /∈A\nexΠ,f\nP=own,mut\nα\nfor anyβ∈Lifetime\nP T\n, α≤\nA\nβ\nlety=mutbor\nα\nx:\nΠ,f\n(Γ+{x:P T},A)→(Γ+{y:mut\nα\nT, x:\n†α\nP T},A)\nifTis of formownU, everyownandmut\nα\ninUis guarded by someimmut\nβ\ndropx:\nΠ,f\n(Γ+{x:T},A)→(Γ,A)\nimmutx:\nΠ,f\n(Γ+{x:mut\nα\nT},A)→(Γ+{x:immut\nα\nT},A)\nx:mut\nα\nT, y:P T∈ΓP=own,mut\nβ\nswap(∗x,∗y) :\nΠ,f\n(Γ,A)→(Γ,A)\nlet∗y=x:\nΠ,f\n(Γ+{x:T},A)→(Γ+{y:ownT},A)\nlety=∗x:\nΠ,f\n(Γ+{x:P P\n′\nT},A)→(Γ+{y: (P◦P\n′\n)T},A)\nP◦own=own◦P:=P R\nα\n◦R\n′\nβ\n:=R\n′′\nα\nwhereR\n′′\n=\n{\nmut(R=R\n′\n=mut)\nimmut(otherwise)\nx:P T∈ΓT:copy\nlet∗y=copy∗x:\nΠ,f\n(Γ,A)→(Γ+{y:ownT},A)\nint:copy unit:copy immut\nα\nT:copy\nT:copy\nμX.T:copy\nT\n0\n,T\n1\n:copy\nT\n0\n+T\n1\n:copy\nT\n0\n,T\n1\n:copy\nT\n0\n×T\n1\n:copy\nT≤\nA\nU\nxasU:\nΠ,f\n(Γ+{x:T},A)→(Γ+{x:U},A)\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|α\n′\na\n0\n≤α\n′\nb\n0\n,...,α\n′\na\nl−1\n≤α\n′\nb\nl−1\n〉(x\n′\n0\n:T\n′\n0\n,...,x\n′\nn−1\n:T\n′\nn−1\n)→T\n′\nn\nfor anyj∈[l], α\na\nj\n≤\nA\nα\nb\nj\nfor anyi∈[n+1], T\ni\n=T\n′\ni\n[α\n0\n/α\n′\n0\n,...,α\nm−1\n/α\n′\nm−1\n]\nlety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n) :\nΠ,f\n(Γ+{x\ni\n:T\ni\n|i∈[n]},A)→(Γ+{y:T\nn\n},A)\nΣ\nΠ,f\n: the function signature of the functionfinΠ\nintroα:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,({α}+A,{α}×({α}+A\nexΠ,f\n)+R)\n)\nα /∈A\nexΠ,f\nnowα:\nΠ,f\n(\nΓ,({α}+A, R)\n)\n→\n(\n{thaw\nα\n(x:\na\nT)|x:\na\nT∈Γ},(A,{(β,γ)∈R|β6=α})\n)\nthaw\nα\n(x:\na\nT) :=\n{\nx:T(a=†α)\nx:\na\nT(otherwise)\nα,β /∈A\nexΠ,f\nα≤β:\nΠ,f\n(\nΓ,(A,R)\n)\n→\n(\nΓ,(A,({(α,β)}∪R)\n+\n)\n)\nI=let∗y=const\nI:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nconst\n},A)\nT\nconst\n: the type ofconst(intorunit)\n\nRustHorn: CHC-based Verification for Rust Programs (full version)33\nx:Pint, x\n′\n:P\n′\nint∈Γ\nlet∗y=∗xop∗x\n′\n:\nΠ,f\n(Γ,A)→(Γ+{y:ownT\nop\n},A)\nT\nop\n: the output type ofop(intorbool)\nlet∗y=rand() :\nΠ,f\n(Γ,A)→(Γ+{y:own int},A)\nlet∗y=inj\nT\n0\n+T\n1\ni\n∗x:\nΠ,f\n(Γ+{x:ownT\ni\n},A)→(Γ+{y:own(T\n0\n+T\n1\n)},A)\nlet∗y= (∗x\n0\n,∗x\n1\n) :\nΠ,f\n(Γ+{x\n0\n:ownT\n0\n, x\n1\n:ownT\n1\n},A)→(Γ+{y:own(T\n0\n×T\n1\n)},A)\nlet(∗y\n0\n,∗y\n1\n) =∗x:\nΠ,f\n(Γ+{x:P(T\n0\n×T\n1\n)},A)→(Γ+{y\n0\n:P T\n0\n, y\n1\n:P T\n1\n},A)\nRule for Drop.The precondition for the typing rule ondropxis just for sim-\nplicity on formal definitions. For concrete operational semantics, a non-guarded\nownwithinownUcauses nested releases of memory cells. For translation to\nCHCs, a non-guardedmutwithinownUwould make value checks complicated.\nThis precondition does not weaken the expressivity, because we can divide\npointers by dereference (lety=∗x), pair destruction (let(∗y\n0\n,∗y\n1\n) =∗x) and\nvariant destruction (match∗x{···}) (possibly using loops/recursions, for recur-\nsive types).\nRule for Swap.We can omit swap between two owning pointers because it is\nessentially the same thing with just swapping the names of the pointers. Note\nthat an active (i.e. not frozen) owning pointer has no other alias at all.\nSubtyping.The subtyping judgmentΞ`T≤\nA\nUis defined as follows. Here,\nΞis a set of assumptions of formT≤U, which is used for subtyping on recursive\ntypes.∅`T≤\nA\nUcan be shortened intoT≤\nA\nU.\nT≤U∈Ξ\nΞ`T≤\nA\nU\nΞ`T≤\nA\nU\nΞ`\nˇ\nP T≤\nA\nˇ\nP U\nΞ`T≤\nA\nU, U≤\nA\nT\nΞ`mut\nα\nT≤\nA\nmut\nα\nU\nΞ`β≤\nA\nα\nΞ`R\nα\nT≤\nA\nR\nβ\nT\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n+T\n1\n≤\nA\nU\n0\n+U\n1\nΞ`T\n0\n≤\nA\nU\n0\n, T\n1\n≤\nA\nU\n1\nΞ`T\n0\n×T\n1\n≤\nA\nU\n0\n×U\n1\nΞ`μX.T≤\nA\nT[μX.T/X], T[μX.T/X]≤\nA\nμX.T\nX\n′\n,Y\n′\nare fresh inΞ Ξ+{X\n′\n≤Y\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y]\nΞ`μX.T≤\nA\nμY.U\nX\n′\n,Y\n′\nare fresh inΞ\nΞ+{X\n′\n≤Y\n′\n,Y\n′\n≤X\n′\n}`T[X\n′\n/X]≤\nA\nU[Y\n′\n/Y], U[Y\n′\n/Y]≤\nA\nT[X\n′\n/X]\nΞ`μX.T≤\nA\nμY.U, μY.U≤\nA\nμX.T\nΞ`T≤\nA\nT\nΞ`T≤\nA\nT\n′\n, T\n′\n≤\nA\nT\n′′\nΞ`T≤\nA\nT\n′′\n\n34Y. Matsushita et al.\nA.2 Complete Rules and an Example Execution for Concrete\nOperational Semantics\nThe following is the complete rules for the judgmentsC→\nΠ\nC\n′\nand final\nΠ\n(C).\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =ownT\n[f,L]F+{(x,a)};S|H+{(a+k,n\nk\n)|k∈[#T]} →\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nT\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =aF(y) =b\n[f,L]F;S|H+{(a+k,m\nk\n)|k∈[#T]}+{(b+k,n\nk\n)|k∈[#T]}\n→\nΠ\n[f,L\n′\n]F;S|H+{(a+k,n\nk\n)|k∈[#T]}+{(b+k,m\nk\n)|k∈[#T]}\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]F+{(x,a\n′\n)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,a\n′\n)}\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]F+{(x,a)};S|H+{(a,a\n′\n)} →\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =R\nα\nP TH(a) =a\n′\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P TF(x) =a\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b+k,H(a+k))|k∈[#T]}\nS\nΠ,f,L\n=I;gotoL\n′\nI=xasT,introα,nowα, α≤β\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F;S|H\nS\nΠ,f,L\n=lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)→U\n[f,L]F+{(x\ni\n,a\ni\n)|i∈[n]};S|H→\nΠ\n[g,entry]{(x\n′\ni\n,a\ni\n)|i∈[n]}; [f,L]y,F;S|H\nS\nΠ,f,L\n=returnx\n[f,L]{(x,a)}; [g,L\n′\n]x\n′\n,F\n′\n;S|H→\nΠ\n[g,L\n′\n]F\n′\n+{(x\n′\n,a)};S|H\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]{(x,a)}|H\n)\nS\nΠ,f,L\n=let∗y=const;gotoL\n′\nH\n′\n=\n{\n{(a,n)}(const=n)\n∅(const= ())\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+H\n′\nS\nΠ,f,L\n=let∗y=∗xop∗x\n′\n;gotoL\n′\nF(x) =aF(x\n′\n) =a\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,b)};S|H+{(b,H(a)〈op〉H(a\n′\n))}\n〈op〉:opas a binary operation on integers, withtrue/falseencoded as 1/0\nS\nΠ,f,L\n=let∗y=rand();gotoL\n′\n[f,L]F;S|H→\nΠ\n[f,L\n′\n]F+{(y,a)};S|H+{(a,n)}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)35\nS\nΠ,f,L\n=let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\nH\n0\n={(a\n′\n+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a+k,m\nk\n)|k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n,i)}+{(a\n′\n+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =own(T\n0\n+T\n1\n)i∈[2]H\n0\n={(a+1+#T\ni\n+k,0)|k∈[(#T\n1−i\n−#T\ni\n)\n≥0\n]}\n[f,L]F+{(x,a)};S|H+{(a,i)}+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}+H\n0\n→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H+{(a+1+k,m\nk\n)|k∈[#T\ni\n]}\nS\nΠ,f,L\n=match∗x{inj\n0\n∗y\n0\n→gotoL\n′\n0\n,inj\n1\n∗y\n1\n→gotoL\n′\n1\n}\nTy\nΠ,f,L\n(x) =R\nα\n(T\n0\n+T\n1\n)H(a) =i∈[2]\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\ni\n]F+{(y\ni\n,a+1)};S|H\nS\nΠ,f,L\n=let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\nfor eachi∈[2],Ty\nΠ,f,L\n(x\ni\n) =ownT\ni\n[f,L]F+{(x\n0\n,a\n0\n),(x\n1\n,a\n1\n)};S|H+{(a\ni\n+k,m\nik\n)|i∈[2],k∈[#T\ni\n]}\n→\nΠ\n[f,L\n′\n]F+{(y,a\n′\n)};S|H+{(a\n′\n+i#T\n0\n+k, m\nik\n)|i∈[2],k∈[#T\ni\n]}\nS\nΠ,f,L\n=let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =P(T\n0\n×T\n1\n)\n[f,L]F+{(x,a)};S|H→\nΠ\n[f,L\n′\n]F+{(y\n0\n,a),(y\n1\n,a+#T\n0\n)};S|H\nExample 5 (Execution on Concrete Operational Semantics).The following is an\nexample execution for the COR program of Example 1.♠,♥,♦,♣represent\nsome distinct addresses (e.g. 100,101,102,103).→\nΠ\nis abbreviated as→.\n[inc-max,entry]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L1]{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[inc-max,L3]{(ma,♠),(mb,♥),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,entry]{(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[take-max,L1]{(ord,♦),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→[take-max,L2]{(ou,♦+1),(ma,♠),(mb,♥)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→\n+\n[take-max,L4]{(ma,♠)};\n[inc-max,L4]mc,{(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L4]{(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3)}\n→[inc-max,L5]{(o1,♦),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♦,1)}\n→\n+\n[inc-max,L7]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,4),(♥,3),(♣,5)}\n→[inc-max,L8]{(oc\n′\n,♣),(mc,♠),(oa,♠),(ob,♥)}|{(♠,5),(♥,3),(♣,4)}\n→\n+\n[inc-max,L10]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→[inc-max,L11]{(oa,♠),(ob,♥)}|{(♠,5),(♥,3)}\n→\n+\n[inc-max,L14]{(ores,♦)}|{(♦,1)}\nThe execution is quite straightforward. Recall that every variable is a pointer\nand holds just an address. Most of the data is stored in the heap.\n\n36Y. Matsushita et al.\nB Complete Rules for Translation from Labeled\nStatements to CHCs\nWe present below the complete rules for (|L:S|)\nΠ,f\n.\n(|L:lety=mutbor\nα\nx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =ownT)\n{\n∀(∆\nΠ,f,L\n+{(x\n◦\n,(|T|))}).\nˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x,x\n◦\n〉/y,〈x\n◦\n,◦x〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:dropx;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n−{(x,mut(|T|))}+{(x\n∗\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:immutx;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n−{x,mut(|T|)}+{x\n∗\n,(|T|)}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[〈x\n∗\n〉/x]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\n(|L:swap(∗x,∗y);gotoL\n′\n|)\nΠ,f\n:=\n{\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x〉/y]}(Ty\nΠ,f,L\n(y) =ownT)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗y,◦x〉/x,〈∗x,◦y〉/y]\n}\n(Ty\nΠ,f,L\n(y) =mut\nα\nT)\n(|L:let∗y=x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈x〉/y]\n}\n(|L:lety=∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[∗x/y]\n}\n(Ty\nΠ,f,L\n(x) =ownP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x〉/y]\n}\n(Ty\nΠ,f,L\n(x) =immut\nα\nP T)\n{∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗∗x,∗◦x〉/y]}(Ty\nΠ,f,L\n(x) =mut\nα\nownT)\n{\n∀(∆\nΠ,f,L\n−{(x,mut box(|T|))}+{(x\n∗\n,box(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,x\n∗\n〉/x]⇐= ˇφ\nΠ,f,L\n′\n[x\n∗\n/y]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT)\n\n\n\n\n\n\n\n∀(∆\nΠ,f,L\n−{(x,mut mut(|T|))}\n+{(x\n∗\n,mut(|T|)),(x\n∗◦\n,(|T|))}).\nˇφ\nΠ,f,L\n[〈x\n∗\n,〈x\n∗◦\n,◦x\n∗\n〉〉/x]\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x\n∗\n,x\n∗◦\n〉/y]\n\n\n\n\n\n\n\n(Ty\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nT)\n(|L:let∗y=copy∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗x〉/y]\n}\n(|L:xasT;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:lety=g〈···〉(x\n0\n,...,x\nn−1\n);gotoL\n′\n|)\nΠ,f\n:={∀(∆\nΠ,f,L\n+{(y,(|Ty\nΠ,f,L\n′\n(y)|))}).ˇφ\nΠ,f,L\n⇐=g\nentry\n(x\n0\n,...,x\nn−1\n,y)∧ˇφ\nΠ,f,L\n′\n}\n(|L:returnx|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n[x/res]⇐=>\n}\n(|L:introα;gotoL\n′\n|)\nΠ,f\n= (|L:nowα;gotoL\n′\n|)\nΠ,f\n= (|L:α≤β;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=const;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈const〉/y]\n}\n\nRustHorn: CHC-based Verification for Rust Programs (full version)37\n(|L:let∗y=∗xop∗x\n′\n;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈∗xop∗x\n′\n〉/y]\n}\n(|L:let∗y=rand();gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n′\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n}\n(|L:let∗y=inj\nT\n0\n+T\n1\ni\n∗x;gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈inj\ni\n∗x〉/y]\n}\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =\nˇ\nP(T\n0\n+T\n1\n)\n(|L:match∗x{inj\n0\n∗y\n0\n→gotoL\n0\n,inj\n1\n∗y\n1\n→gotoL\n1\n}|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\ni\n).ˇφ\nΠ,f,L\n[〈inj\ni\n∗y\ni\n,inj\ni\n◦y\ni\n〉/x]⇐= ˇφ\nΠ,f,L\ni\n∣\n∣\ni∈[2]\n}\nif Ty\nΠ,f,L\n(x) =mut\nα\n(T\n0\n+T\n1\n)\n(|L:let∗y= (∗x\n0\n,∗x\n1\n);gotoL\n′\n|)\nΠ,f\n:=\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x\n0\n,∗x\n1\n)〉/y]\n}\n(|L:let(∗y\n0\n,∗y\n1\n) =∗x;gotoL\n′\n|)\nΠ,f\n:=\n\n\n\n\n\n\n\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐= ˇφ\nΠ,f,L\n′\n[〈(∗x).0〉/y\n0\n,〈(∗x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =\nˇ\nP T)\n{\n∀(∆\nΠ,f,L\n).ˇφ\nΠ,f,L\n⇐=\nˇφ\nΠ,f,L\n′\n[〈(∗x).0,(◦x).0〉/y\n0\n,〈(∗x).1,(◦x).1〉/y\n1\n]\n}\n(Ty\nΠ,f,L\n(x) =mut\nα\nT)\nRule for Dereference.The rule for dereference (lety=∗x) may seem com-\nplicated at a glance. It is however just because this single instruction can cause\nmultiple events (dereference, release of a mutable reference, and reborrow).\nC Proof of the Correctness of the CHC Representation\nC.1 Abstract Operational Semantics\nWe introduceabstract operation semanticsfor COR, as a mediator between\nconcrete operational semantics and the logic. In abstract operational semantics,\nwe get rid of heaps and directly represent each variable as a value with such\nfuture values expressed asabstract variablesx(marked bold and light blue),\nwhich is strongly related toprophecy variables. An abstract variable represents\nthe undetermined value of a mutable reference at the end of borrow.\nFormally, we introduce apre-value, which is defined as follows:\n(pre-value)ˆv,ˆw::=〈ˆv〉 | 〈ˆv\n∗\n,ˆv\n◦\n〉 |inj\ni\nˆv|(ˆv\n0\n,ˆv\n1\n)|const|x.\nAbstract operational semantics is described as transition on program states\nencoded as anabstract configurationC, which is defined as follows. Here, an\nabstract stack frameFmaps variables to pre-values. We may omit the terminator\n‘; end’.\nS::= end\n∣\n∣\n[f,L]\nΘ\nx,F;S(abstract configuration)C::= [f,L]\nΘ\nF;S |\nA\nIn order to facilitate proofs later, we append lifetime-related ghost informa-\ntion toC, which does not directly affect the execution.Ais aglobal lifetime\n\n38Y. Matsushita et al.\ncontext, which is the lifetime context of all local lifetime variables from all con-\ncrete stack frames; we add atagon a local lifetime variable (e.g.α\n(i)\ninstead of\nα) to clarify which stack frame it belongs to.Θis alifetime parameter context,\nwhich maps the lifetime variables in the (local) lifetime context for a stack frame\nto the correspondingtaggedlifetime variables in the global lifetime context.\nJust as concrete operational semantics, abstract operational semantics is\ncharacterized by the one-step transition relationC →\nΠ\nC\n′\nand the termina-\ntion relation final\nΠ\n(C), which are defined by the following rules.C[ˆv/x] isCwith\neveryxin its abstract stack frames replaced with ˆv. ‘val’ maps both〈ˆv〉and\n〈ˆv,x\n◦\n〉to ˆv.\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=lety=mutbor\nα\nx;gotoL\n′\nx\n◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n′\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗\n,x\n◦\n〉),(x,〈x\n◦\n,x\n′\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =\nˇ\nP T\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=dropx;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF;S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=immutx;gotoL\n′\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n)[\nˆv\n∗\n/x\n◦\n]\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =ownT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n〉)};S |\nA\nS\nΠ,f,L\n=swap(∗x,∗y);gotoL\n′\nTy\nΠ,f,L\n(y) =mut\nα\nT\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n,x\n◦\n〉),(y,〈ˆw\n∗\n,y\n◦\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(x,〈ˆw\n∗\n,x\n◦\n〉),(y,〈ˆv\n∗\n,y\n◦\n〉)};S |\nA\nS\nΠ,f,L\n=let∗y=x;gotoL\n′\n[f,L]\nΘ\nF+{(x,ˆv)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =ownP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,ˆv\n∗\n)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =immut\nα\nP T\n[f,L]\nΘ\nF+{(x,〈ˆv\n∗\n〉)};S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(ˆv\n∗\n)〉)};S |\nA\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nownTx\n◦∗\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n◦∗\n〉)};S |\nA\n)[\n〈x\n◦∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nimmut\nβ\nT\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n〉)};S |\nA\n)[\n〈ˆv\n∗∗\n〉/x\n◦\n]\nS\nΠ,f,L\n=lety=∗x;gotoL\n′\nTy\nΠ,f,L\n(x) =mut\nα\nmut\nβ\nTx\n∗◦\nis fresh\n[f,L]\nΘ\nF+{(x,〈〈ˆv\n∗∗\n,x\n′\n∗◦\n〉,x\n◦\n〉)};S |\nA\n→\nΠ\n(\n[f,L\n′\n]\nΘ\nF+{(y,〈ˆv\n∗∗\n,x\n∗◦\n〉)};S |\nA\n)[\n〈x\n∗◦\n,x\n′\n∗◦\n〉/x\n◦\n]\n\nRustHorn: CHC-based Verification for Rust Programs (full version)39\nS\nΠ,f,L\n=let∗y=copy∗x;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF+{(y,〈val(F(x))〉)};S |\nA\nS\nΠ,f,L\n=xasT;gotoL\n′\n[f,L]\nΘ\nF;S |\nA\n→\nΠ\n[f,L\n′\n]\nΘ\nF;S |\nA\nS\nΠ,f,L\n=lety=g〈α\n0\n,...,α\nm−1\n〉(x\n0\n,...,x\nn−1\n);gotoL\n′\nΣ\nΠ,g\n=〈α\n′\n0\n,...,α\n′\nm−1\n|···〉(x\n′\n0\n:T\n0\n,...,x\n′\nn−1\n:T\nn−1\n)Θ\n′\n={(α\n′\nj\n,α\nj\nΘ)|j∈[m]}\n[f,L]\nΘ\nF+{(x\ni\n,ˆv\ni\n)|i∈[n]};S |\nA\n→\nΠ\n[g,entry]\nΘ\n′\n{(x\n′\ni\n,ˆv\ni\n)|i∈[n]}; [f,L\n′\n]\nΘ\ny,F;S |\nA\nS\nΠ,f,L\n=returnx\n[f,L]\nΘ\n{(x,ˆv)}; [g,L\n′\n]\nΘ\n′\nx\n′\n,F\n′\n;S |\nA\n→\nΠ\n[g,L\n′\n]\nΘ\n′\nF\n′\n+{(x\n′\n,ˆv)};S |\nA\nS\nΠ,f,L\n=returnx\nfinal\nΠ\n(\n[f,L]\nΘ\n{(x,ˆv)}|\nA\n)\nS\nΠ,f,L\n=introα;gotoL\n′\nShasnlayersA\nex\n={α\n(k)\n∈A|k ${workdir}/gen_DB.sh + echo "npm install -g @a_kawashiro/jendeley@${version}" >> ${workdir}/gen_DB.sh + echo "jendeley scan --papers_dir /workdir/test_pdfs --book_dirs /workdir/test_pdfs/dummyTapl" >> ${workdir}/gen_DB.sh + docker run --volume /${workdir}:/workdir --rm node:21 bash /workdir/gen_DB.sh && cp ${workdir}/test_pdfs/jendeley_db.json ${rootdir}/jendeley-backend/generated_DBs/jendeley_db_${version}.json & + process_id=$! + process_list+=(${process_id}) +done + +for p in "${process_list[@]}"; do + wait ${p} +done diff --git a/jendeley-backend/src/update_db.ts b/jendeley-backend/src/update_db.ts index d8cec62..25e0691 100644 --- a/jendeley-backend/src/update_db.ts +++ b/jendeley-backend/src/update_db.ts @@ -7,7 +7,12 @@ import fetch from "node-fetch"; import { Request as NFRequest } from "node-fetch"; import { Either, genLeft, genRight } from "./either"; import { validateJsonDB } from "./validate_db"; -import { ENTRY_AUTHORS, ENTRY_TEXT, ENTRY_TITLE } from "./constants"; +import { + ENTRY_AUTHORS, + ENTRY_TEXT, + DB_META_KEY, + JENDELEY_VERSION, +} from "./constants"; async function getTextsFromPDF( pdfFullpath: string, @@ -79,6 +84,7 @@ async function update_db(dbPathVer1: string[], dbPathVer2: string[]) { jsonDB[id][ENTRY_AUTHORS] = []; } } + jsonDB[DB_META_KEY]["version"] = JENDELEY_VERSION; if (fs.existsSync(concatDirs(dbPathVer2))) { logger.fatal(dbPathVer2 + " already exists."); diff --git a/run-CI.sh b/run-CI.sh index e35b487..75aa47b 100755 --- a/run-CI.sh +++ b/run-CI.sh @@ -1,5 +1,5 @@ #! /bin/bash -eux -docker build . -f ./Dockerfile -docker build . -f ./Releasable.Dockerfile -docker build . -f ./shellcheck.Dockerfile +docker build . -f ./Dockerfile --network=host +docker build . -f ./Releasable.Dockerfile --network=host +docker build . -f ./shellcheck.Dockerfile --network=host