From 0b0776c65158db3b07590479d54e10727f2d140b Mon Sep 17 00:00:00 2001
From: Jolan Rensen <jolan.rensen@jetbrains.com>
Date: Fri, 1 Nov 2024 12:39:39 +0100
Subject: [PATCH] added OOM message pointing to new csv implementation

---
 .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 99 ++++++++++---------
 1 file changed, 54 insertions(+), 45 deletions(-)
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt
index 605f9a3bf..a22c60ce0 100644
--- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt
+++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt
@@ -351,61 +351,70 @@ public fun DataFrame.Companion.readDelim(
     readLines: Int? = null,
     parserOptions: ParserOptions? = null,
 ): AnyFrame {
-    var reader = reader
-    if (skipLines > 0) {
-        reader = BufferedReader(reader)
-        repeat(skipLines) { reader.readLine() }
-    }
-
-    val csvParser = format.parse(reader)
-    val records = if (readLines == null) {
-        csvParser.records
-    } else {
-        require(readLines >= 0) { "`readLines` must not be negative" }
-        val records = ArrayList<CSVRecord>(readLines)
-        val iter = csvParser.iterator()
-        var count = readLines ?: 0
-        while (iter.hasNext() && 0 < count--) {
-            records.add(iter.next())
+    try {
+        var reader = reader
+        if (skipLines > 0) {
+            reader = BufferedReader(reader)
+            repeat(skipLines) { reader.readLine() }
         }
-        records
-    }
-
-    val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
-        ?: (1..(records.firstOrNull()?.count() ?: 0)).map { index -> "X$index" }
 
-    val generator = ColumnNameGenerator()
-    val uniqueNames = columnNames.map { generator.addUnique(it) }
+        val csvParser = format.parse(reader)
+        val records = if (readLines == null) {
+            csvParser.records
+        } else {
+            require(readLines >= 0) { "`readLines` must not be negative" }
+            val records = ArrayList<CSVRecord>(readLines)
+            val iter = csvParser.iterator()
+            var count = readLines ?: 0
+            while (iter.hasNext() && 0 < count--) {
+                records.add(iter.next())
+            }
+            records
+        }
 
-    val cols = uniqueNames.mapIndexed { colIndex, colName ->
-        val defaultColType = colTypes[".default"]
-        val colType = colTypes[colName] ?: defaultColType
-        var hasNulls = false
-        val values = records.map {
-            if (it.isSet(colIndex)) {
-                it[colIndex].ifEmpty {
+        val columnNames = csvParser.headerNames.takeIf { it.isNotEmpty() }
+            ?: (1..(records.firstOrNull()?.count() ?: 0)).map { index -> "X$index" }
+
+        val generator = ColumnNameGenerator()
+        val uniqueNames = columnNames.map { generator.addUnique(it) }
+
+        val cols = uniqueNames.mapIndexed { colIndex, colName ->
+            val defaultColType = colTypes[".default"]
+            val colType = colTypes[colName] ?: defaultColType
+            var hasNulls = false
+            val values = records.map {
+                if (it.isSet(colIndex)) {
+                    it[colIndex].ifEmpty {
+                        hasNulls = true
+                        null
+                    }
+                } else {
                     hasNulls = true
                     null
                 }
-            } else {
-                hasNulls = true
-                null
             }
-        }
-        val column = DataColumn.createValueColumn(colName, values, typeOf<String>().withNullability(hasNulls))
-        when (colType) {
-            null -> column.tryParse(parserOptions)
-
-            else -> {
-                column.tryParse(
-                    (parserOptions ?: ParserOptions()).copy(
-                        skipTypes = ParserOptions.allTypesExcept(colType.toKType()),
-                    ),
-                )
+            val column = DataColumn.createValueColumn(colName, values, typeOf<String>().withNullability(hasNulls))
+            when (colType) {
+                null -> column.tryParse(parserOptions)
+
+                else -> {
+                    column.tryParse(
+                        (parserOptions ?: ParserOptions()).copy(
+                            skipTypes = ParserOptions.allTypesExcept(colType.toKType()),
+                        ),
+                    )
+                }
             }
         }
+        return cols.toDataFrame()
+    } catch (e: OutOfMemoryError) {
+        throw OutOfMemoryError(
+            "Ran out of memory reading this CSV-like file. " +
+                "You can try our new experimental CSV reader by adding the dependency " +
+                "\"org.jetbrains.kotlinx:dataframe-csv:{VERSION}\" and using `DataFrame.readCsv()` instead of " +
+                "`DataFrame.readCSV()`. This requires `@OptIn(ExperimentalCsv::class)`.",
+        )
     }
-    return cols.toDataFrame()
 }
 
 public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT): Unit =