forked from spark-examples/pyspark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pandas-pyspark-dataframe.py
47 lines (33 loc) · 1.21 KB
/
pandas-pyspark-dataframe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
"""
author SparkByExamples.com
"""
import pandas as pd
data = [['Scott', 50], ['Jeff', 45], ['Thomas', 54],['Ann',34]]
# Create the pandas DataFrame
pandasDF = pd.DataFrame(data, columns = ['Name', 'Age'])
# print dataframe.
print(pandasDF)
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.master("local[1]") \
.appName("SparkByExamples.com") \
.getOrCreate()
sparkDF=spark.createDataFrame(pandasDF)
sparkDF.printSchema()
sparkDF.show()
#sparkDF=spark.createDataFrame(pandasDF.astype(str))
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
mySchema = StructType([ StructField("First Name", StringType(), True)\
,StructField("Age", IntegerType(), True)])
sparkDF2 = spark.createDataFrame(pandasDF,schema=mySchema)
sparkDF2.printSchema()
sparkDF2.show()
spark.conf.set("spark.sql.execution.arrow.enabled","true")
spark.conf.set("spark.sql.execution.arrow.pyspark.fallback.enabled","true")
pandasDF2=sparkDF2.select("*").toPandas
print(pandasDF2)
test=spark.conf.get("spark.sql.execution.arrow.enabled")
print(test)
test123=spark.conf.get("spark.sql.execution.arrow.pyspark.fallback.enabled")
print(test123)