Skip to content

Commit

Permalink
Add a columnToList method as that's commonly needed
Browse files Browse the repository at this point in the history
  • Loading branch information
MrPowers committed Jul 22, 2018
1 parent 1c9442f commit fd371a1
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,41 @@ object DataFrameHelpers extends DataFrameValidator {
df.select(colName).collect().map(r => r(0).asInstanceOf[T])
}

/**
* Converts a DataFrame column to a List of values
* '''N.B. This method uses `collect` and should only be called on small DataFrames.'''
*
* This function converts a column to a list of items.
*
* Suppose we have the following `sourceDF`:
*
* {{{
* +---+
* |num|
* +---+
* | 1|
* | 2|
* | 3|
* +---+
* }}}
*
* Let's convert the `num` column to a List of values. Let's run the code and view the results.
*
* {{{
* val actual = DataFrameHelpers.columnToList[Int](sourceDF, "num")
*
* println(actual)
*
* // List(1, 2, 3)
* }}}
*/
def columnToList[T: ClassTag](
df: DataFrame,
colName: String
): List[T] = {
columnToArray[T](df, colName).toList
}

/**
* Converts a DataFrame to an Array of Maps
* '''N.B. This method uses `collect` and should only be called on small DataFrames.'''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,28 @@ object DataFrameHelpersTest

}

'columnToList - {

"converts a column to a list" - {

val sourceDF = spark.createDF(
List(
1,
2,
3
), List(
("num", IntegerType, true)
)
)

val actual = DataFrameHelpers.columnToList[Int](sourceDF, "num")

actual ==> List(1, 2, 3)

}

}

'toArrayOfMaps - {

"converts a DataFrame into an array of maps" - {
Expand Down

0 comments on commit fd371a1

Please sign in to comment.