diff --git a/core/src/main/scala/doric/syntax/BinaryColumns.scala b/core/src/main/scala/doric/syntax/BinaryColumns.scala index f6e64633d..bb5440c25 100644 --- a/core/src/main/scala/doric/syntax/BinaryColumns.scala +++ b/core/src/main/scala/doric/syntax/BinaryColumns.scala @@ -19,6 +19,7 @@ private[syntax] trait BinaryColumns { * as a 32 character hex string. * * @group Binary Type + * @see [[org.apache.spark.sql.functions.md5]] */ def md5: StringColumn = column.elem.map(f.md5).toDC @@ -27,6 +28,7 @@ private[syntax] trait BinaryColumns { * as a 40 character hex string. * * @group Binary Type + * @see [[org.apache.spark.sql.functions.sha1]] */ def sha1: StringColumn = column.elem.map(f.sha1).toDC @@ -36,6 +38,7 @@ private[syntax] trait BinaryColumns { * * @throws java.lang.IllegalArgumentException if numBits is not in the permitted values * @group Binary Type + * @see [[org.apache.spark.sql.functions.sha2]] */ def sha2(numBits: Int): StringColumn = column.elem.map(x => f.sha2(x, numBits)).toDC @@ -45,6 +48,7 @@ private[syntax] trait BinaryColumns { * returns the value as a long column. * * @group Binary Type + * @see [[org.apache.spark.sql.functions.crc32]] */ def crc32: LongColumn = column.elem.map(f.crc32).toDC @@ -53,6 +57,7 @@ private[syntax] trait BinaryColumns { * This is the reverse of unbase64. * * @group Binary Type + * @see [[org.apache.spark.sql.functions.base64]] */ def base64: StringColumn = column.elem.map(f.base64).toDC @@ -62,6 +67,7 @@ private[syntax] trait BinaryColumns { * If either argument is null, the result will also be null. * * @group Binary Type + * @see [[org.apache.spark.sql.functions.decode]] */ def decode(charset: StringColumn): StringColumn = (column.elem, charset.elem) diff --git a/core/src/main/scala/doric/syntax/BooleanColumns.scala b/core/src/main/scala/doric/syntax/BooleanColumns.scala index 51c0814b9..e9c683661 100644 --- a/core/src/main/scala/doric/syntax/BooleanColumns.scala +++ b/core/src/main/scala/doric/syntax/BooleanColumns.scala @@ -67,6 +67,7 @@ private[syntax] trait BooleanColumns { * * @throws java.lang.RuntimeException if the condition is false * @group Boolean Type + * @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.assert_true]] */ def assertTrue: NullColumn = column.elem.map(f.assert_true).toDC @@ -75,6 +76,7 @@ private[syntax] trait BooleanColumns { * * @throws java.lang.RuntimeException if the condition is false * @group Boolean Type + * @see [[org.apache.spark.sql.functions.assert_true(c:org\.apache\.spark\.sql\.Column,e:* org.apache.spark.sql.functions.assert_true]] */ def assertTrue(msg: StringColumn): NullColumn = (column.elem, msg.elem).mapN(f.assert_true).toDC diff --git a/core/src/main/scala/doric/syntax/CommonColumns.scala b/core/src/main/scala/doric/syntax/CommonColumns.scala index 887e2a266..89fa1b824 100644 --- a/core/src/main/scala/doric/syntax/CommonColumns.scala +++ b/core/src/main/scala/doric/syntax/CommonColumns.scala @@ -31,6 +31,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { * the DoricColumns to coalesce * @return * the first column that is not null, or null if all inputs are null. + * @see [[org.apache.spark.sql.functions.coalesce]] */ def coalesce[T](cols: DoricColumn[T]*): DoricColumn[T] = cols.map(_.elem).toList.sequence.map(f.coalesce(_: _*)).toDC @@ -39,6 +40,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { * Calculates the hash code of given columns, and returns the result as an integer column. * * @group All Types + * @see [[org.apache.spark.sql.functions.hash]] */ def hash(cols: DoricColumn[_]*): IntegerColumn = cols.map(_.elem).toList.sequence.map(f.hash(_: _*)).toDC @@ -48,6 +50,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { * variant of the xxHash algorithm, and returns the result as a long column. * * @group All Types + * @see [[org.apache.spark.sql.functions.xxhash64]] */ def xxhash64(cols: DoricColumn[_]*): LongColumn = cols.map(_.elem).toList.sequence.map(f.xxhash64(_: _*)).toDC @@ -181,6 +184,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { * literals to compare to * @return * Boolean DoricColumn with the comparation logic. + * @see [[org.apache.spark.sql.Column.isin]] */ def isIn(elems: T*): BooleanColumn = column.elem.map(_.isin(elems: _*)).toDC @@ -189,6 +193,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { * @group All Types * @return * Boolean DoricColumn + * @see [[org.apache.spark.sql.Column.isNull]] */ def isNull: BooleanColumn = column.elem.map(_.isNull).toDC @@ -197,6 +202,7 @@ private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { * @group All Types * @return * Boolean DoricColumn + * @see [[org.apache.spark.sql.Column.isNotNull]] */ def isNotNull: BooleanColumn = column.elem.map(_.isNotNull).toDC diff --git a/core/src/main/scala/doric/syntax/DateColumns.scala b/core/src/main/scala/doric/syntax/DateColumns.scala index 4b21001ff..c30e43889 100644 --- a/core/src/main/scala/doric/syntax/DateColumns.scala +++ b/core/src/main/scala/doric/syntax/DateColumns.scala @@ -15,6 +15,7 @@ private[syntax] trait DateColumns { * All calls of current_date within the same query return the same value. * * @group Date Type + * @see [[org.apache.spark.sql.functions.current_date]] */ def currentDate(): DateColumn = f.current_date().asDoric[Date] @@ -32,6 +33,7 @@ private[syntax] trait DateColumns { * Date column after adding months * @note * Timestamp columns will be truncated to Date column + * @see [[org.apache.spark.sql.functions.add_months(startDate:org\.apache\.spark\.sql\.Column,numMonths:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.add_months]] */ def addMonths(nMonths: IntegerColumn): DateColumn = (column.elem, nMonths.elem).mapN(f.add_months).toDC @@ -44,6 +46,7 @@ private[syntax] trait DateColumns { * @note * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.date_add(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_add]] */ def addDays(days: IntegerColumn): DateColumn = (column.elem, days.elem).mapN(f.date_add).toDC @@ -59,6 +62,7 @@ private[syntax] trait DateColumns { * Use specialized functions like 'year' whenever possible as they benefit from a * specialized implementation. * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.date_format]] */ def format(format: StringColumn): StringColumn = (column.elem, format.elem) @@ -75,6 +79,7 @@ private[syntax] trait DateColumns { * @note * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.date_sub(start:org\.apache\.spark\.sql\.Column,days:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.date_sub]] */ def subDays(days: IntegerColumn): DateColumn = (column.elem, days.elem).mapN(f.date_sub).toDC @@ -85,6 +90,7 @@ private[syntax] trait DateColumns { * @param dateCol * A Date or Timestamp column * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.datediff]] */ def diff(dateCol: DoricColumn[T]): IntegerColumn = (column.elem, dateCol.elem) @@ -95,6 +101,7 @@ private[syntax] trait DateColumns { * Extracts the day of the month as an integer from a given date. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.dayofmonth]] */ def dayOfMonth: IntegerColumn = column.elem.map(f.dayofmonth).toDC @@ -103,6 +110,7 @@ private[syntax] trait DateColumns { * Ranges from 1 for a Sunday through to 7 for a Saturday * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.dayofweek]] */ def dayOfWeek: IntegerColumn = column.elem.map(f.dayofweek).toDC @@ -110,6 +118,7 @@ private[syntax] trait DateColumns { * Extracts the day of the year as an integer from a given date. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.dayofyear]] */ def dayOfYear: IntegerColumn = column.elem.map(f.dayofyear).toDC @@ -117,6 +126,7 @@ private[syntax] trait DateColumns { * Sets the moment to the last day of the same month. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.last_day]] */ def endOfMonth: DateColumn = lastDayOfMonth @@ -126,6 +136,7 @@ private[syntax] trait DateColumns { * month in July 2015. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.last_day]] */ def lastDayOfMonth: DateColumn = column.elem.map(f.last_day).toDC @@ -133,6 +144,7 @@ private[syntax] trait DateColumns { * Extracts the month as an integer from a given date. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.month]] */ def month: IntegerColumn = column.elem.map(f.month).toDC @@ -143,7 +155,7 @@ private[syntax] trait DateColumns { * of their respective months. Otherwise, the difference is calculated assuming 31 days per month. * * For example: - * {{{ + * @example {{{ * Date("2017-11-14").monthsBetween(Date("2017-07-14")) // returns 4.0 * Date("2017-01-01").monthsBetween(Date("2017-01-10")) // returns 0.29032258 * Timestamp("2017-06-01 00:00:00").monthsBetween(Timestamp("2017-06-16 12:00:00")) // returns -0.5 @@ -152,6 +164,7 @@ private[syntax] trait DateColumns { * @param dateCol * Date or Timestamp column * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.months_between]] */ def monthsBetween(dateCol: DoricColumn[T]): DoubleColumn = (column.elem, dateCol.elem).mapN(f.months_between).toDC @@ -165,6 +178,7 @@ private[syntax] trait DateColumns { * If `roundOff` is set to true, the result is rounded off to 8 digits; * it is not rounded otherwise. * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.months_between(end:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column,roundOff:* org.apache.spark.sql.functions.months_between]] */ def monthsBetween( dateCol: DoricColumn[T], @@ -180,14 +194,15 @@ private[syntax] trait DateColumns { * Returns the first date which is later than the value of the `date` column that is on the * specified day of the week. * - * For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02") because - * that is the first Sunday after 2015-07-27. + * @example For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02") + * because that is the first Sunday after 2015-07-27. * * @param dayOfWeek * Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" * @note * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.next_day]] */ def nextDay(dayOfWeek: StringColumn): DateColumn = (column.elem, dayOfWeek.elem) @@ -200,28 +215,30 @@ private[syntax] trait DateColumns { * Extracts the quarter as an integer from a given date. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.quarter]] */ def quarter: IntegerColumn = column.elem.map(f.quarter).toDC /** * Returns date truncated to the unit specified by the format. * - * For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01") + * @example For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01") * * @param format - * if date: - * * 'year', 'yyyy', 'yy' to truncate by year, - * * 'month', 'mon', 'mm' to truncate by month - * Other options are: 'week', 'quarter' - * if timestamp: - * * 'year', 'yyyy', 'yy' to truncate by year, - * * 'month', 'mon', 'mm' to truncate by month, - * * 'day', 'dd' to truncate by day, - * Other options are: - * * 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter' + * - if date: + * - 'year', 'yyyy', 'yy' to truncate by year, + * - 'month', 'mon', 'mm' to truncate by month + * - __Other options are__: 'week', 'quarter' + * - if timestamp: + * - 'year', 'yyyy', 'yy' to truncate by year, + * - 'month', 'mon', 'mm' to truncate by month, + * - 'day', 'dd' to truncate by day, + * - __Other options are__: 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter' * @note * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.trunc]] + * @see [[org.apache.spark.sql.functions.date_trunc]] */ def truncate(format: StringColumn): DoricColumn[T] = (column.elem, format.elem) @@ -243,6 +260,7 @@ private[syntax] trait DateColumns { * A long * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.unix_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.unix_timestamp]] */ def unixTimestamp: LongColumn = column.elem.map(f.unix_timestamp).toDC @@ -253,6 +271,7 @@ private[syntax] trait DateColumns { * as defined by ISO 8601 * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.weekofyear]] */ def weekOfYear: IntegerColumn = column.elem.map(f.weekofyear).toDC @@ -260,6 +279,7 @@ private[syntax] trait DateColumns { * Extracts the year as an integer from a given date. * * @group Date & Timestamp Type + * @see [[org.apache.spark.sql.functions.year]] */ def year: IntegerColumn = column.elem.map(f.year).toDC @@ -267,6 +287,7 @@ private[syntax] trait DateColumns { * Transform date to timestamp * * @group Date Type + * @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]] */ def toTimestamp: TimestampColumn = column.elem.map(f.to_timestamp).toDC @@ -274,6 +295,7 @@ private[syntax] trait DateColumns { * Transform date to Instant * * @group Date Type + * @see [[org.apache.spark.sql.functions.to_timestamp(s:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.to_timestamp]] */ def toInstant: InstantColumn = column.elem.map(f.to_timestamp).toDC } diff --git a/core/src/main/scala/doric/syntax/StringColumns.scala b/core/src/main/scala/doric/syntax/StringColumns.scala index ab6d98004..c29a472ee 100644 --- a/core/src/main/scala/doric/syntax/StringColumns.scala +++ b/core/src/main/scala/doric/syntax/StringColumns.scala @@ -17,6 +17,7 @@ private[syntax] trait StringColumns { * @return * a reference of a single DoricColumn with all strings concatenated. If at * least one is null will return null. + * @see [[org.apache.spark.sql.functions.concat]] */ def concat(cols: StringColumn*): StringColumn = cols.map(_.elem).toList.sequence.map(f.concat(_: _*)).toDC @@ -63,6 +64,7 @@ private[syntax] trait StringColumns { * @return * Formats the arguments in printf-style and returns the result as a string * column. + * @see [[org.apache.spark.sql.functions.format_string]] */ def formatString( format: StringColumn, @@ -109,6 +111,7 @@ private[syntax] trait StringColumns { * and returns the result as an int column. * * @group String Type + * @see [[org.apache.spark.sql.functions.ascii]] */ def ascii: IntegerColumn = s.elem.map(f.ascii).toDC @@ -116,11 +119,12 @@ private[syntax] trait StringColumns { * Returns a new string column by converting the first letter of each word * to uppercase. Words are delimited by whitespace. * - * For example, "hello world" will become "Hello World". + * @example For example, "hello world" will become "Hello World". * * @group String Type + * @see [[org.apache.spark.sql.functions.initcap]] */ - def initcap: StringColumn = s.elem.map(f.initcap).toDC + def initCap: StringColumn = s.elem.map(f.initcap).toDC /** * Locate the position of the first occurrence of substr column in the @@ -130,8 +134,9 @@ private[syntax] trait StringColumns { * @note * The position is not zero based, but 1 based index. Returns 0 if substr * could not be found in str. + * @see [[org.apache.spark.sql.functions.instr]] */ - def instr(substring: StringColumn): IntegerColumn = + def inStr(substring: StringColumn): IntegerColumn = (s.elem, substring.elem) .mapN((str, substr) => { new Column(StringInstr(str.expr, substr.expr)) @@ -144,6 +149,7 @@ private[syntax] trait StringColumns { * spaces. The length of binary strings includes binary zeros. * * @group String Type + * @see [[org.apache.spark.sql.functions.length]] */ def length: IntegerColumn = s.elem.map(f.length).toDC @@ -151,6 +157,7 @@ private[syntax] trait StringColumns { * Computes the Levenshtein distance of the two given string columns. * * @group String Type + * @see [[org.apache.spark.sql.functions.levenshtein]] */ def levenshtein(dc: StringColumn): IntegerColumn = (s.elem, dc.elem).mapN(f.levenshtein).toDC @@ -163,6 +170,8 @@ private[syntax] trait StringColumns { * @note * The position is not zero based, but 1 based index. returns 0 if substr * could not be found in str. + * @see org.apache.spark.sql.functions.locate + * @todo scaladoc link (issue #135) */ def locate( substr: StringColumn, @@ -178,6 +187,7 @@ private[syntax] trait StringColumns { * Converts a string column to lower case. * * @group String Type + * @see [[org.apache.spark.sql.functions.lower]] */ def lower: StringColumn = s.elem.map(f.lower).toDC @@ -187,6 +197,7 @@ private[syntax] trait StringColumns { * characters. * * @group String Type + * @see [[org.apache.spark.sql.functions.lpad]] */ def lpad(len: IntegerColumn, pad: StringColumn): StringColumn = (s.elem, len.elem, pad.elem) @@ -199,6 +210,7 @@ private[syntax] trait StringColumns { * Trim the spaces from left end for the specified string value. * * @group String Type + * @see [[org.apache.spark.sql.functions.ltrim(e:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.ltrim]] */ def ltrim: StringColumn = s.elem.map(f.ltrim).toDC @@ -207,6 +219,7 @@ private[syntax] trait StringColumns { * string column. * * @group String Type + * @see [[org.apache.spark.sql.functions.ltrim(e:org\.apache\.spark\.sql\.Column,trimString:* org.apache.spark.sql.functions.ltrim]] */ def ltrim(trimString: StringColumn): StringColumn = (s.elem, trimString.elem) @@ -220,6 +233,7 @@ private[syntax] trait StringColumns { * byte position `pos` of `src` and proceeding for `len` bytes. * * @group String Type + * @see [[org.apache.spark.sql.functions.overlay(src:org\.apache\.spark\.sql\.Column,replace:org\.apache\.spark\.sql\.Column,pos:org\.apache\.spark\.sql\.Column,len:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.overlay]] */ def overlay( replace: StringColumn, @@ -237,6 +251,7 @@ private[syntax] trait StringColumns { * * @throws java.lang.IllegalArgumentException if the specified group index exceeds the group count of regex * @group String Type + * @see [[org.apache.spark.sql.functions.regexp_extract]] */ def regexpExtract( exp: StringColumn, @@ -253,6 +268,7 @@ private[syntax] trait StringColumns { * with replacement. * * @group String Type + * @see [[org.apache.spark.sql.functions.regexp_replace(e:org\.apache\.spark\.sql\.Column,pattern:org\.apache\.spark\.sql\.Column,* org.apache.spark.sql.functions.regexp_replace]] */ def regexpReplace( pattern: StringColumn, @@ -264,6 +280,7 @@ private[syntax] trait StringColumns { * Repeats a string column n times, and returns it as a new string column. * * @group String Type + * @see [[org.apache.spark.sql.functions.repeat]] */ def repeat(n: IntegerColumn): StringColumn = (s.elem, n.elem) .mapN((str, times) => new Column(StringRepeat(str.expr, times.expr))) @@ -275,6 +292,7 @@ private[syntax] trait StringColumns { * characters. * * @group String Type + * @see [[org.apache.spark.sql.functions.rpad]] */ def rpad(len: IntegerColumn, pad: StringColumn): StringColumn = (s.elem, len.elem, pad.elem) @@ -285,6 +303,7 @@ private[syntax] trait StringColumns { * Trim the spaces from right end for the specified string value. * * @group String Type + * @see [[org.apache.spark.sql.functions.rtrim(e:org\.apache\.spark\.sql\.Column):* org.apache.spark.sql.functions.rtrim]] */ def rtrim: StringColumn = s.elem.map(f.rtrim).toDC @@ -293,6 +312,7 @@ private[syntax] trait StringColumns { * string column. * * @group String Type + * @see [[org.apache.spark.sql.functions.rtrim(e:org\.apache\.spark\.sql\.Column,trimString:* org.apache.spark.sql.functions.rtrim]] */ def rtrim(trimString: StringColumn): StringColumn = (s.elem, trimString.elem) @@ -303,21 +323,10 @@ private[syntax] trait StringColumns { * Returns the soundex code for the specified expression. * * @group String Type + * @see [[org.apache.spark.sql.functions.soundex]] */ def soundex: StringColumn = s.elem.map(f.soundex).toDC - /** - * Splits str around matches of the given pattern. - * - * @param pattern - * a string representing a regular expression. The regex string should be - * a Java regular expression. - * - * @group String Type - */ - def split(pattern: StringColumn): ArrayColumn[String] = - split(pattern, (-1).lit) - /** * Splits str around matches of the given pattern. * @@ -326,16 +335,18 @@ private[syntax] trait StringColumns { * a string representing a regular expression. The regex string should be * a Java regular expression. * @param limit - * an integer expression which controls the number of times the regex is - * applied.