-
Notifications
You must be signed in to change notification settings - Fork 35
/
step3_setupairlinedemo.sh
executable file
·80 lines (71 loc) · 2.41 KB
/
step3_setupairlinedemo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
export HOME_DIR=/home/demo
export PROJECT_DIR=$HOME_DIR/hdp-datascience-demo
export HDP_VER=`ls /usr/hdp/ | grep 2`
#create HDFS dirs
sudo -u hdfs hadoop fs -mkdir /user/demo
sudo -u hdfs hadoop fs -chown demo:demo /user/demo
hadoop fs -mkdir /user/demo/airline
hadoop fs -mkdir /user/demo/airline/delay
hadoop fs -mkdir /user/demo/airline/weather
#Get the data files and upload to HDFS
echo "Downloading delay data to HDFS...."
cd $PROJECT_DIR/demo
mkdir airline
cd airline
mkdir delay
cd delay
wget http://stat-computing.org/dataexpo/2009/2007.csv.bz2
bzip2 -d 2007.csv.bz2
wget http://stat-computing.org/dataexpo/2009/2008.csv.bz2
bzip2 -d 2008.csv.bz2
hadoop fs -put $PROJECT_DIR/demo/airline/delay/*.csv /user/demo/airline/delay
#delete copy of data from local FS to save space
rm $PROJECT_DIR/demo/airline/delay/*.csv
echo "Downloading weather data to HDFS...."
cd $PROJECT_DIR/demo/airline
mkdir weather
cd $PROJECT_DIR/demo/airline/weather
wget ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/2007.csv.gz
gunzip -d 2007.csv.gz
wget ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/by_year/2008.csv.gz
gunzip -d 2008.csv.gz
hadoop fs -put $PROJECT_DIR/demo/airline/weather/*.csv /user/demo/airline/weather
#delete copy of data from local FS to save space
rm $PROJECT_DIR/demo/airline/weather/*.csv
cd $PROJECT_DIR/demo
echo ""
echo ""
echo "The demo setup is complete"
echo "To run the python demo execute"
echo "source ~/.bashrc"
if [ -e /usr/hdp/$HDP_VER/hadoop/bin/hdfs ]
then
echo "cd /home/demo/hdp-datascience-demo/demo-HDP2.2"
else
echo "cd /home/demo/hdp-datascience-demo/demo"
fi
echo "ipython notebook"
echo "Then navigate to http://sandbox.hortonworks.com:9999 and open airline_python.ipynb"
echo ""
echo "To run the Scala/Spark demo execute"
echo "source ~/.bashrc"
if [ -e /usr/hdp/$HDP_VER/hadoop/bin/hdfs ]
then
echo "cd /home/demo/hdp-datascience-demo/demo-HDP2.2"
else
echo "cd /home/demo/hdp-datascience-demo/demo"
fi
echo "ipython notebook --profile spark"
echo "Then navigate to http://sandbox.hortonworks.com:9998 and open airline_spark.ipynb"
echo ""
echo "To run the R/Scalding demo execute"
echo "source ~/.bashrc"
echo "R CMD javareconf -e"
if [ -e /usr/hdp/$HDP_VER/hadoop/bin/hdfs ]
then
echo "cd /home/demo/hdp-datascience-demo/demo-HDP2.2"
else
echo "cd /home/demo/hdp-datascience-demo/demo"
fi
echo "ipython notebook"
echo "Then navigate to http://sandbox.hortonworks.com:9999 and open airline_Scalding_R.ipynb"