diff --git a/currentdate.py b/currentdate.py index 6995fb3..2a5a421 100644 --- a/currentdate.py +++ b/currentdate.py @@ -3,21 +3,26 @@ Created on Thu Oct 24 22:42:50 2019 @author: prabha + +Modified on Wed Feb 21 2024 +@author: Shahid + """ import pyspark from pyspark.sql import SparkSession -from pyspark.sql.functions import col -from pyspark.sql.functions import to_timestamp, current_timestamp -from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType +from pyspark.sql.types import DateType +from pyspark.sql.functions import to_date spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate() -schema = StructType([ - StructField("seq", StringType(), True)]) +# Sample data with a string column representing dates +data = [("1/12/2023",), ("2/15/2023",), ("3/20/2023",)] +columns = ["date_str"] -dates = ['1'] +df = spark.createDataFrame(data, columns) -df = spark.createDataFrame(list('1'), schema=schema) +# Convert the string column to a DateType +df = df.withColumn("date", to_date("date_str", "M/d/yyyy").cast(DateType())) -df.show() \ No newline at end of file +df.show()