forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyspark-unix-time.py
42 lines (36 loc) · 1.18 KB
/
pyspark-unix-time.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
"""
author SparkByExamples.com
"""
from pyspark.sql import SparkSession
# Create SparkSession
spark = SparkSession.builder \
.appName('SparkByExamples.com') \
.getOrCreate()
inputData = [("2019-07-01 12:01:19",
"07-01-2019 12:01:19",
"07-01-2019")]
columns=["timestamp_1","timestamp_2","timestamp_3"]
df=spark.createDataFrame(
data = inputData,
schema = columns)
df.printSchema()
df.show(truncate=False)
from pyspark.sql.functions import *
df2 = df.select(
unix_timestamp(col("timestamp_1")).alias("timestamp_1"),
unix_timestamp(col("timestamp_2"),"MM-dd-yyyy HH:mm:ss").alias("timestamp_2"),
unix_timestamp(col("timestamp_3"),"MM-dd-yyyy").alias("timestamp_3"),
unix_timestamp().alias("timestamp_4")
)
df2.printSchema()
df2.show(truncate=False)
df3=df2.select(
from_unixtime(col("timestamp_1")).alias("timestamp_1"),
from_unixtime(col("timestamp_2"),"MM-dd-yyyy HH:mm:ss").alias("timestamp_2"),
from_unixtime(col("timestamp_3"),"MM-dd-yyyy").alias("timestamp_3"),
from_unixtime(col("timestamp_4")).alias("timestamp_4")
)
df3.printSchema()
df3.show(truncate=False)
#SQL