-
Notifications
You must be signed in to change notification settings - Fork 3
/
arch.dot
30 lines (22 loc) · 1.45 KB
/
arch.dot
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
digraph ethane {
node [shape=record];
PIG_ANALYTICS [label="PIG_ANALYTICS|Unstructured-unsupported-pigscripts| pig_ad_hoc(0-n)"];
CUSTOMER_PAGE [label="CUSTOMER_PAGE|json|CUSTOMER_PAGE/part*"];
DIRTY_CSV [label="DIRTY_CSV|fname lname -prod , price ,prod,..|generated/part*"];
CSV [label="CSV|fname,lname,prod,price,date,xcoord,ycoord,...|cleaned/part*"];
MAHOUT_VIEW_INPUT [label="MAHOUT_VIEW | (hashed name) 10001, (hashed purchases) 203 | <hive_warehouse>/mahout_cf_in/part*" ];
MAHOUT_CF [label="MAHOUT_CF | (hashed name) 10001, (hashed product) 201, .6 | mahout_cf_out/part*" ];
Generate -> DIRTY_CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.generator.BPSGenerator 100 bps/generated/"] ;
DIRTY_CSV -> pig [label=""];
pig -> CSV [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.PigCSVCleaner bps/generated/ bps/cleaned/"];
pig -> PIG_ANALYTICS [label="same as CSV job, but add your scripts to end... p1.pig p2.pig ..."];
PIG_ANALYTICS -> CSV;
PROD_HASH -> hive [label="hive hash udf"];
USER_HASH -> hive [label="hive hash udf"];
CSV -> hive ;
hive -> MAHOUT_VIEW_INPUT [label="hadoop jar bigpetstore.jar org.bigtop.bigpetstore.etl.HiveViewCreator bps/pig_out mahout_cf_in"];
MAHOUT_VIEW_INPUT -> mahout_collab_filter_recomender -> MAHOUT_CF;
MAHOUT_CF -> crunch ;
CSV -> crunch ;
crunch -> CUSTOMER_PAGE [label="high performance joining"];
}