H3 UDF's for Apache Hive
- EdgeLength
- GeoToH3
- GeoToH3Address
- GetH3UnidirectionalEdge
- GetH3UnidirectionalEdgesFromHexagon
- H3Distance
- H3IndexesAreNeighbors
- H3SetToMultiPolygon
- H3ToCenterChild
- H3ToCenterChildWkt
- H3ToChildren
- H3ToChildrenWkt
- H3ToGeoBoundaryWkt
- H3ToGeoWkt
- H3ToParent
- H3ToParentWkt
- H3ToString
- HexArea
- KRing
- KRingDistances
- KRingToWkt
- LatLongH3ToGeoBoundaryWkt
- NumHexagons
- PolyfillToArrayH3Index
- PolyfillToArrayWkt
For all of the functions listed below you execute describe.
DESCRIBE FUNCTION <function_name>;
DESCRIBE FUNCTION EXTENDED <function_name>;
end
NOTE: Temporary functions are used for these examples. Temporary functions will not work in LLAP. You must create permanent functions in LLAP.
CREATE TEMPORARY FUNCTION EdgeLength AS 'com.dot.h3.hive.udf.EdgeLength';
SELECT _FUNC_(12, 'm') AS edge_meters;
+--------------+
| edge_meters |
+--------------+
| 9.415526211 |
+--------------+
CREATE TEMPORARY FUNCTION GeoToH3 AS 'com.dot.h3.hive.udf.GeoToH3';
SELECT GeoToH3(CAST(40.86016 AS DOUBLE), CAST(-73.90071 AS DOUBLE), 12) AS index;
+---------------------+
| index |
+---------------------+
| 631243922056054783 |
+---------------------+
CREATE TEMPORARY FUNCTION GeoToH3Address AS 'com.dot.h3.hive.udf.GeoToH3Address';
SELECT GeoToH3Address(CAST(40.86016 AS DOUBLE), CAST(-73.90071 AS DOUBLE), 12) AS index;
+------------------+
| index |
+------------------+
| 892a100acc7ffff |
+------------------+
NOTE: The indexes must be neighbors for this to work.
BAD EXAMPLE
CREATE TEMPORARY FUNCTION GetH3UnidirectionalEdge AS 'com.dot.h3.hive.udf.GetH3UnidirectionalEdge';
SELECT GetH3UnidirectionalEdge(61773312317403955,631243922056054783) AS index;
Error: Error while compiling statement: FAILED: IllegalArgumentException Given indexes are not neighbors. (state=42000,code=40000)
First Example: neighbors that work
CREATE TEMPORARY FUNCTION GetH3UnidirectionalEdge AS 'com.dot.h3.hive.udf.GetH3UnidirectionalEdge';
SELECT GetH3UnidirectionalEdge(617733122422996991,617733122423259135) AS edge;
+----------------------+
| edge |
+----------------------+
| 1266251468764348415 |
+----------------------+
Second Example: neighbors that work from string:
CREATE TEMPORARY FUNCTION GetH3UnidirectionalEdge AS 'com.dot.h3.hive.udf.GetH3UnidirectionalEdge';
SELECT GetH3UnidirectionalEdge('892a1008003ffff','892a1008007ffff') AS edge;"
+-------------------+
| edge |
+-------------------+
| 1192a1008003ffff |
+-------------------+
First Example:
CREATE TEMPORARY FUNCTION gh3udefh AS 'com.dot.h3.hive.udf.GetH3UnidirectionalEdgesFromHexagon';
SELECT gh3udefh(599718724986994687) AS list;
+----------------------------------------------------+
| list |
+----------------------------------------------------+
| [1248237071328346111,1320294665366274047, etc. |
+----------------------------------------------------+
Second Example:
CREATE TEMPORARY FUNCTION gh3udefh AS 'com.dot.h3.hive.udf.GetH3UnidirectionalEdgesFromHexagon';
SELECT gh3udefh('852a100bfffffff') AS list;"
+----------------------------------------------------+
| list |
+----------------------------------------------------+
| [\"1152a100bfffffff\",\"1252a100bfffffff\", etc. |
+----------------------------------------------------+
NOTE: The indexes should be same size? I had the error as noted below.
CREATE TEMPORARY FUNCTION H3Distance AS 'com.dot.h3.hive.udf.H3Distance';
SELECT H3Distance(61773312317403955,631243922056054783) AS dist;
Error: Error while compiling statement: FAILED: SemanticException [Error 10014]: Line 1:7 Wrong arguments '631243922056054783': org.apache.hadoop.hive.ql.metadata.HiveException:com.uber.h3core.exceptions.DistanceUndefinedException: Distance not defined between the two indexes. (state=42000,code=10014)
Example: neighbors that work
CREATE TEMPORARY FUNCTION H3Distance AS 'com.dot.h3.hive.udf.H3Distance';
SELECT H3Distance(617733122422996991,617733122423259135) AS dist;
+-------+
| dist |
+-------+
| 1 |
+-------+
BAD Example
CREATE TEMPORARY FUNCTION H3IndexesAreNeighbors AS 'com.dot.h3.hive.udf.H3IndexesAreNeighbors';
SELECT H3IndexesAreNeighbors('db768011473333','892a100acc7ffff') AS neighbors;
+------------+
| neighbors |
+------------+
| false |
+------------+
Example neighbors that work
CREATE TEMPORARY FUNCTION H3IndexesAreNeighbors AS 'com.dot.h3.hive.udf.H3IndexesAreNeighbors';
SELECT H3IndexesAreNeighbors(617733122422996991,617733122423259135) AS neighbors;
+------------+
| neighbors |
+------------+
| true |
+------------+
Example 1
CREATE TEMPORARY FUNCTION H3SetToMultiPolygon AS 'com.dot.h3.hive.udf.H3SetToMultiPolygon';
SELECT H3SetToMultiPolygon(array(617733122422996991,617733122423259135)) AS wkt_array;
+----------------------------------------------------+
| wkt_array |
+----------------------------------------------------+
| [\"POLYGON((-73.99184674763184 40.851169994189114,-73.99177736650066 40.84940709139823,-73.98960021718895 40.84858352856829,-73.98749236284553 40.849522800877686,-73.98756158290558 40.85128570584109,-73.98973881838064 40.852109336325405,-73.98980812679454 40.85387229829392,-73.99198552554876 40.85469591594949,-73.99409352970484 40.85375650398033,-73.99402406020228 40.85199354419126,-73.99184674763184 40.851169994189114))\",\"POLYGON((-73.99184674763184 40.851169994189114,-73.99177736650066 40.84940709139823,-73.98960021718895 40.84858352856829,-73.98749236284553 40.849522800877686,-73.98756158290558 40.85128570584109,-73.98973881838064 40.852109336325405,-73.98980812679454 40.85387229829392,-73.99198552554876 40.85469591594949,-73.99409352970484 40.85375650398033,-73.99402406020228 40.85199354419126,-73.99184674763184 40.851169994189114))\"] |
+----------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3SetToMultiPolygon AS 'com.dot.h3.hive.udf.H3SetToMultiPolygon';
SELECT H3SetToMultiPolygon(array('892a1008003ffff','892a1008007ffff')) AS wkt_array;"
+----------------------------------------------------+
| wkt_array |
+----------------------------------------------------+
| same as the above |
+----------------------------------------------------+)
Example 1
CREATE TEMPORARY FUNCTION H3ToCenterChild AS 'com.dot.h3.hive.udf.H3ToCenterChild'
SELECT H3ToCenterChild(61773312317403955, 13) AS center_child
+--------------------+
| center_child |
+--------------------+
| 61773312317403955 |
+--------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToCenterChild AS 'com.dot.h3.hive.udf.H3ToCenterChild'
SELECT H3ToCenterChild('db768011473333', 13) AS center_child;
+-----------------+
| center_child |
+-----------------+
| db768011473333 |
+-----------------+
Example 1
CREATE TEMPORARY FUNCTION H3ToCenterChildWkt AS 'com.dot.h3.hive.udf.H3ToCenterChildWkt'
SELECT H3ToCenterChildWkt(61773312317403955, 13) AS center_child_wkt;
+------------------------------------------------+
| center_child_wkt |
+------------------------------------------------+
| POINT(-105.89054624819013 -30.32377110841559) |
+------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToCenterChildWkt AS 'com.dot.h3.hive.udf.H3ToCenterChildWkt';
SELECT H3ToCenterChildWkt('db768011473333', 13) AS center_child_wkt;"
+------------------------------------------------+
| center_child_wkt |
+------------------------------------------------+
| POINT(-105.89054624819013 -30.32377110841559) |
+------------------------------------------------+
NOTE: This returns the center point of the index
Example 1
CREATE TEMPORARY FUNCTION H3ToChildren AS 'com.dot.h3.hive.udf.H3ToChildren';
SELECT H3ToChildren(599718724986994687, 9) AS children;
+----------------------------------------------------+
| children |
+----------------------------------------------------+
| [617733122422996991,617733122423259135,617733122423521279, etc..
+----------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToChildren AS 'com.dot.h3.hive.udf.H3ToChildren';
SELECT H3ToChildren('852a100bfffffff', 9) AS children;"
+----------------------------------------------------+
| children |
+----------------------------------------------------+
| [\"892a1008003ffff\",\"892a1008007ffff\", etc... |
+----------------------------------------------------+
Example 1
CREATE TEMPORARY FUNCTION H3ToChildrenWkt AS 'com.dot.h3.hive.udf.H3ToChildrenWkt';
SELECT H3ToChildrenWkt(599718724986994687, 9) AS children;
+----------------------------------------------------+
| children |
+----------------------------------------------------+
| [\"POINT(-73.99191613398102 40.85293293570688)\",\"POINT(-73.98966951517899 40.85034641308286)\",
+----------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToChildrenWkt AS 'com.dot.h3.hive.udf.H3ToChildrenWkt';
SELECT H3ToChildrenWkt('852a100bfffffff', 9) AS children;"
+----------------------------------------------------+
| children |
+----------------------------------------------------+
| [\"POINT(-73.99191613398102 40.85293293570688)\",\"POINT(-73.98966951517899 40.85034641308286)\",
+----------------------------------------------------+
Example 1
CREATE TEMPORARY FUNCTION H3ToGeoBoundaryWkt AS 'com.dot.h3.hive.udf.H3ToGeoBoundaryWkt';
SELECT H3ToGeoBoundaryWkt(61773312317403955) AS wkt;
+----------------------------------------------------+
| wkt |
+----------------------------------------------------+
| POLYGON((-105.89053610304362 -30.323807809188516, |
+----------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToGeoBoundaryWkt AS 'com.dot.h3.hive.udf.H3ToGeoBoundaryWkt';
SELECT H3ToGeoBoundaryWkt('892a100acc7ffff') AS wkt;"
+----------------------------------------------------+
| wkt |
+----------------------------------------------------+
| POLYGON((-105.89053610304362 -30.323807809188516, |
+----------------------------------------------------+
Example 1
CREATE TEMPORARY FUNCTION H3ToGeoWkt AS 'com.dot.h3.hive.udf.H3ToGeoWkt';
SELECT H3ToGeoWkt(61773312317403955) AS wkt;
+------------------------------------------------+
| wkt |
+------------------------------------------------+
| POINT(-105.89054624819013 -30.32377110841559) |
+------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToGeoWkt AS 'com.dot.h3.hive.udf.H3ToGeoWkt';
SELECT H3ToGeoWkt('892a100acc7ffff') AS wkt;"
+------------------------------------------------+
| wkt |
+------------------------------------------------+
| POINT(-105.89054624819013 -30.32377110841559) |
+------------------------------------------------+
Example 1
CREATE TEMPORARY FUNCTION H3ToParent AS 'com.dot.h3.hive.udf.H3ToParent';
SELECT H3ToParent(617733123174039551, 5) AS parent;
+---------------------+
| parent |
+---------------------+
| 599718724986994687 |
+---------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToParent AS 'com.dot.h3.hive.udf.H3ToParent';
SELECT H3ToParent('892a100acc7ffff', 5) AS parent;"
+------------------+
| parent |
+------------------+
| 852a100bfffffff |
+------------------+
Example 1
CREATE TEMPORARY FUNCTION H3ToParentWkt AS 'com.dot.h3.hive.udf.H3ToParentWkt';
SELECT H3ToParentWkt(617733123174039551, 9) AS parent;
+----------------------------------------------+
| parent |
+----------------------------------------------+
| POINT(-73.90212095615803 40.86061876224212) |
+----------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION H3ToParentWkt AS 'com.dot.h3.hive.udf.H3ToParentWkt';
SELECT H3ToParentWkt('892a100acc7ffff', 9) AS parent;"
+----------------------------------------------+
| parent |
+----------------------------------------------+
| POINT(-73.90212095615803 40.86061876224212) |
+----------------------------------------------+
Example
CREATE TEMPORARY FUNCTION H3ToString AS 'com.dot.h3.hive.udf.H3ToString';
SELECT H3ToString(631243922056054783);
8c2a100acc687ff
Example 1
CREATE TEMPORARY FUNCTION HexArea as 'com.dot.h3.hive.udf.HexArea';
SELECT HexArea(9, 'km2') AS hex_area;
+------------+
| hex_area |
+------------+
| 0.1053325 |
+------------+
Example 2
CREATE TEMPORARY FUNCTION HexArea as 'com.dot.h3.hive.udf.HexArea';
SELECT HexArea(9, 'm2') AS hex_area;
+-----------+
| hex_area |
+-----------+
| 105332.5 |
+-----------+
Example 1
CREATE TEMPORARY FUNCTION KRing AS 'com.dot.h3.hive.udf.KRing';
SELECT KRing(617733123174039551, 9) AS kring;
+----------------------------------------------+
| kring |
+----------------------------------------------+
| [617733123174039551,617733123173777407, etc. |
+----------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION KRing AS 'com.dot.h3.hive.udf.KRing';
SELECT KRing('892a100acc7ffff', 9) AS kring;"
+----------------------------------------------+
| kring |
+----------------------------------------------+
| [\"892a100acc7ffff\",\"892a100acc3ffff\",etc.|
+----------------------------------------------+
Example 1
CREATE TEMPORARY FUNCTION KRingDistances AS 'com.dot.h3.hive.udf.KRingDistances';
SELECT KRingDistances(631243922056054783, 9) AS wkt;
+----------------------------------------------------+
| wkt |
+----------------------------------------------------+
| MULTIPOLYGON(((-73.90074702414034 40.86016857340853))
+----------------------------------------------------+
Example 2
CREATE TEMPORARY FUNCTION KRingDistances AS 'com.dot.h3.hive.udf.KRingDistances';
SELECT KRingDistances('8c2a100acc687ff', 9) AS wkt;"
+----------------------------------------------------+
| wkt |
+----------------------------------------------------+
| MULTIPOLYGON(((-73.90074702414034 40.86016857340853))
+----------------------------------------------------+
Example 1
CREATE TEMPORARY FUNCTION KRingToWkt AS 'com.dot.h3.hive.udf.KRingToWkt';
SELECT KRingToWkt(617733123174039551, 9) AS KRingToWkt;"
+----------------------------------------------------+"
| kringtowkt |"
+----------------------------------------------------+"
| [\"POINT(-73.90212095615803 40.86061876224212)\", |"
+----------------------------------------------------+"
Example 2
CREATE TEMPORARY FUNCTION KRingToWkt AS 'com.dot.h3.hive.udf.KRingToWkt';
SELECT KRingToWkt('892a100acc7ffff', 9) AS KRingToWkt;"
+----------------------------------------------------+"
| kringtowkt |"
+----------------------------------------------------+"
| [\"POINT(-73.90212095615803 40.86061876224212)\", |"
+----------------------------------------------------+"
Example
SELECT _FUNC_(40.86016, -73.90071, 12);
POLYGON((-73.90218697935661 40.862381901482266,-73.9042969767565 40.86144407471913,-73.90423087546569 40.85968095579108,-73.90205493792557 40.858855661723865,-73.89994501590182 40.85979341878112,-73.90001095604163 40.86155653960862))
--The resolution can be between 0 and 15, 15 is the most granular
Example
CREATE TEMPORARY FUNCTION NumHexagons as 'com.dot.h3.hive.udf.NumHexagons';
SELECT NumHexagons(9) AS num_hexagons;
+---------------+
| num_hexagons |
+---------------+
| 4842432842 |
+---------------+
Example
CREATE TEMPORARY FUNCTION PolyfillToArrayH3Index AS 'com.dot.h3.hive.udf.PolyfillToArrayH3Index';
SELECT _FUNC_('POLYGON((-71.23094863399959 42.35171702149799,-71.20507841890782 42.39384377360396,-71.18534241583312 42.40583588152941,-71.13489748711537 42.40374196572458,-71.12786523200806 42.3537116038451,-71.23094863399959 42.35171702149799))', null, 9) AS WKT;
--Returns Array<String>
--Can take either NULL, MULTIPOLYGON or POLYGON WKT for the holes_poly_multipoly argument.
--The resolution can be between 0 and 15, 15 is the most granular
Example
CREATE TEMPORARY FUNCTION PolyfillToArrayWkt AS 'com.dot.h3.hive.udf.PolyfillToArrayWkt';
SELECT PolyfillToArrayWkt('POLYGON((-71.23094863399959 42.35171702149799,-71.20507841890782 42.39384377360396,-71.18534241583312 42.40583588152941,-71.13489748711537 42.40374196572458,-71.12786523200806 42.3537116038451,-71.23094863399959 42.35171702149799))', null, 9) AS WKT;
--Returns Array<String>
--Can take either NULL, MULTIPOLYGON or POLYGON WKT for the holes_poly_multipoly argument.
--The resolution can be between 0 and 15, 15 is the most granular
- ESRI is also used and must be installed, permanent functions have already been created for ESRI.
- You have loaded a table with breadcrumb data.
- You have loaded a table with the New York City tlc zones, similar to the following (https://catalog.data.gov/dataset/nyc-taxi-zones)
NOTE: This example uses sudo code as an example. The real tables and columns have been changed from our system. NOTE: The distinct in the query is to limit the index's on the flattening of the array where we have overlap between zones.
This has proven to significantly increase the performance of the query. If you are querying on a single zone the distinct would not be required.
This is however an example of a working query, the table has been partitioned in Hive by a column called ym for the current year and month.
USE sample_data;
SET tez.queue.name=big;
CREATE TEMPORARY FUNCTION geotoh3 as 'com.dot.h3.hive.udf.GeoToH3';
CREATE TEMPORARY FUNCTION polyfilltoarrayh3index as 'com.dot.h3.hive.udf.PolyfillToArrayH3Index';
CREATE TEMPORARY FUNCTION H3ToGeoBoundaryWkt AS 'com.dot.h3.hive.udf.H3ToGeoBoundaryWkt';
SET hivevar:RESOLUTION=10;
with geomtab AS (
SELECT
st_astext(ST_GeomFromText(geometry)) as geometry
FROM tlc_zones
WHERE
geometry rlike 'MULTI.*' = false
),
geom_array AS (
SELECT
polyfilltoarrayh3index(geomtab.geometry, NULL, 10) AS `hexid`
FROM
geomtab
),
bread AS (
SELECT
bc_id
, bc_timestamp
, cast(geotoh3(vehicle_lat,vehicle_long, 10) as BIGINT ) AS `hexid`
FROM city_breadcrumbs
WHERE
ym = 201911
),
flattened_geom_array AS (
SELECT
DISTINCT CAST(poly_hexid AS BIGINT) AS poly_hexid
FROM geom_array lateral view explode(hexid) geom_array as `poly_hexid`
),
preout AS (
SELECT
bread.*
, flattened_geom_array.poly_hexid
, H3ToGeoBoundaryWkt(cast(flattened_geom_array.poly_hexid as bigint)) AS `wkt`
FROM
flattened_geom_array, bread
WHERE
bread.hexid = flattened_geom_array.poly_hexid
)
SELECT
count(*)
, poly_hexid
, wkt
FROM preout
GROUP BY
poly_hexid, wkt;
Next we take a look at a single zone and execute at a higher resolution.
USE my_db;
SET tez.queue.name=big;
CREATE TEMPORARY FUNCTION geotoh3 as 'com.dot.h3.hive.udf.GeoToH3';
CREATE TEMPORARY FUNCTION polyfilltoarrayh3index as 'com.dot.h3.hive.udf.PolyfillToArrayH3Index';
CREATE TEMPORARY FUNCTION H3ToGeoBoundaryWkt AS 'com.dot.h3.hive.udf.H3ToGeoBoundaryWkt';
set hivevar:RESOLUTION=15;
set hivevar:LOCATIONID=142;
with geomtab AS (
SELECT
st_astext(ST_GeomFromText(geometry)) as geometry
FROM tlc_zones
WHERE
geometry rlike 'MULTI.*' = false
AND locationid = ${LOCATIONID}
),
geom_array AS (
SELECT
polyfilltoarrayh3index(geomtab.geometry, NULL, ${RESOLUTION}) AS `hexid`
FROM
geomtab
),
bread AS (
SELECT
bc_id
,bc_timestamp
,cast(geotoh3(vehicle_lat,vehicle_long, ${RESOLUTION}) as BIGINT ) AS `hexid`
FROM city_breadcrumbs
WHERE
ym = 201911
),
flattened_geom_array AS (
SELECT
DISTINCT CAST(poly_hexid AS BIGINT) AS poly_hexid
FROM geom_array lateral view explode(hexid) geom_array as `poly_hexid`
),
preout AS (
SELECT
bread.*
, flattened_geom_array.poly_hexid
, H3ToGeoBoundaryWkt(cast(flattened_geom_array.poly_hexid as bigint)) AS `wkt`
FROM
flattened_geom_array, bread
WHERE
bread.hexid = flattened_geom_array.poly_hexid
)
SELECT
count(*) total
, poly_hexid
, wkt
FROM preout
GROUP BY
poly_hexid, wkt;
The output looks as follows in QGIS
Taking a closer look at the results you can see that PolyfillToArrayH3Index will not perfectly conform to the boundary and is expected. In some cases this is along a street.
Another function could possibly provide the ability to extend slightly beyond the boundary created with the PolyfillToArrayH3Index function.
###Kring
The Kring function takes an index + resolution and finds all the neighbors around it in a ring. To investigate further let's look at the index 635747521119807551 as seen below highlighted in green.
Method used to create the KRing round this index:
NOTE: resolution is 1
USE my_db;
SET tez.queue.name=big;
CREATE TEMPORARY FUNCTION KRing AS 'com.dot.h3.hive.udf.KRing';
with geom_array AS (
SELECT KRing(635747521119807551, 1) AS `hexid`
),
flattened_geom_array AS (
SELECT
DISTINCT CAST(poly_hexid AS BIGINT) AS poly_hexid
FROM geom_array lateral view explode(hexid) geom_array as `poly_hexid`
)
,
preout AS (
SELECT
flattened_geom_array.poly_hexid
,H3ToGeoBoundaryWkt(cast(flattened_geom_array.poly_hexid as bigint)) AS `wkt`
FROM
flattened_geom_array
)
SELECT * FROM preout;
Result in QGIS:
Next we will take this Kring functionality back to the query we used on the zone in order to pull in extra information.
NOTE: Again we are using a distinct here to limit the number of indexes as the Kring will create duplicates. Adding Kring into the query does add some additional overhead and time to execute.
USE my_db;
SET tez.queue.name=big;
CREATE TEMPORARY FUNCTION geotoh3 as 'com.dot.h3.hive.udf.GeoToH3';
CREATE TEMPORARY FUNCTION polyfilltoarrayh3index as 'com.dot.h3.hive.udf.PolyfillToArrayH3Index';
CREATE TEMPORARY FUNCTION H3ToGeoBoundaryWkt AS 'com.dot.h3.hive.udf.H3ToGeoBoundaryWkt';
CREATE TEMPORARY FUNCTION KRing AS 'com.dot.h3.hive.udf.KRing';
with geomtab AS (
SELECT
st_astext(ST_GeomFromText(geometry)) as geometry
FROM tlc_zones
WHERE
AND locationid = 142
),
geom_array AS (
SELECT
polyfilltoarrayh3index(geomtab.geometry, NULL, 13) AS `hexid`
FROM
geomtab
),
flattened_geom_array AS (
SELECT
DISTINCT CAST(poly_hexid AS BIGINT) AS poly_hexid
FROM geom_array lateral view explode(hexid) geom_array as `poly_hexid`
),
kring_arrays AS (
SELECT
KRing(poly_hexid, 1) AS `hexid`
FROM flattened_geom_array
),
flatten_kring AS(
SELECT
DISTINCT CAST(poly_hexid AS BIGINT) AS poly_hexid
FROM kring_arrays lateral view explode(hexid) kring_arrays as `poly_hexid`
),
bread AS (
SELECT
bc_id
,bc_timestamp
,cast(geotoh3(vehicle_lat,vehicle_long, 13) as BIGINT ) AS `hexid`
FROM city_breadcrumbs
WHERE
ym = 201911
),
preout AS (
SELECT
bread.*
, flatten_kring.poly_hexid
, H3ToGeoBoundaryWkt(cast(flatten_kring.poly_hexid as bigint)) AS `wkt`
FROM
flatten_kring, bread
WHERE
bread.hexid = flatten_kring.poly_hexid
)
SELECT
count(*) total
, poly_hexid
, wkt
FROM preout
GROUP BY
poly_hexid, wkt;
Comparing the results, we can see that we now have overlap with the boundary that we did not prior. It would be up to the Analyst to decide if they want to add this for their specific use case. With resolution set to 1 for Kring you will pull in all neighbors and not only would there be overlap but you would pull in data from the other zones. Related to your polygon resolution, the higher the resolution the more accurate with a hit to performance. However, it is likely that a single zone at resolution 15 is going to run pretty quickly, our example is at resolution 13.
We will continue to explore with the index 635747521119807551 which is derived from the resolution 13. Execution of the following query will give us the children at the finest resolution of 15.
CREATE TEMPORARY FUNCTION H3ToChildrenWkt AS 'com.dot.h3.hive.udf.H3ToChildrenWkt';
with children_array AS (
SELECT H3ToChildrenWkt( CAST(635747521119807551 AS BIGINT), 15) AS wkt_arr
),
flattened_children_array AS (
SELECT
DISTINCT point
FROM children_array lateral view explode(wkt_arr) children_array as `point`
)
SELECT point FROM flattened_children_array;
The output displays as the yellow dots in QGIS
Next we will plot out the children polygons inside the index 635747521119807551.
In order to do so, we are using several hive functions and h3 functions together to translate the data. Perhaps in the future I will build a function in Java that handles most of this work for us.
CREATE TEMPORARY FUNCTION GeoToH3 AS 'com.dot.h3.hive.udf.GeoToH3';
CREATE TEMPORARY FUNCTION H3ToChildrenWkt AS 'com.dot.h3.hive.udf.H3ToChildrenWkt';
CREATE TEMPORARY FUNCTION H3SetToMultiPolygon AS 'com.dot.h3.hive.udf.H3SetToMultiPolygon';
CREATE TEMPORARY FUNCTION H3ToGeoBoundaryWkt AS 'com.dot.h3.hive.udf.H3ToGeoBoundaryWkt';
with children_array AS (
SELECT H3ToChildrenWkt( CAST(635747521119807551 AS BIGINT), 15) AS wkt_arr
),
flattened_children_array AS (
SELECT
DISTINCT point
FROM children_array lateral view explode(wkt_arr) children_array as `point`
)
SELECT
H3ToGeoBoundaryWkt(
GeoToH3(
CAST( split(translate(translate(point, 'POINT(', '') , ')', ''), ' ')[1] AS DOUBLE ) --LAT
,CAST( split(translate(translate(point, 'POINT(', '') , ')', ''), ' ')[0] AS DOUBLE ) --LNG
,15
)
) AS poly
FROM flattened_children_array;
#Building
If you need to update your POM with a different version of HDP
https://repo.hortonworks.com/content/groups/public/org/apache/hive/hive-exec/ https://repo.hortonworks.com/content/groups/public/org/apache/hadoop/hadoop-client/
For the next example we used a query performed by one of the analysts using ESRI and converted the query to use Uber H3 in place of the original boundary.
NOTE: I forced the number of mappers to increase artificially, according to the following article it is also not the recommended method. However, the method's mentioned in the article were not yielding results and they have for me in the past on different queries. This is a very good article and it should be further investigated.
Original Analyst Query
CREATE TEMPORARY TABLE IF NOT exists all_rows AS
SELECT
d.*, (distance_meter/time_dif_sec)*2.23694 AS ind_speed --meter/second to mph
FROM
(
SELECT
c.*
, unix_timestamp(tpep_datetime)-unix_timestamp(lag_datetime) AS time_dif_sec
, ST_GeodesicLengthWGS84(ST_SetSRID(ST_Linestring(longitude,latitude, lag_longitude,lag_latitude), 4326)) AS distance_meter
FROM
(
SELECT
b.*
, lag(tpep_datetime) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_datetime
, lag(geometry) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_geometry
, lag(longitude) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_longitude
, lag(latitude) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_latitude
FROM
(
SELECT
a.*
, rank() over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) AS rank1
FROM
(
SELECT
bread.medallion
, bread.hack_number
, bread.type
, bread.vendor
, bread.trip_number
, bread.cruise
, bread.tpep_datetime
, bread.geometry
, bread.longitude
, bread.latitude
, year(bread.tpep_datetime) AS `YEAR`
, month(bread.tpep_datetime) AS `MONTH`
, day(bread.tpep_datetime) AS `DAY`
, hour(bread.tpep_datetime) AS `HOUR`
FROM bread,default.my_custom_shapefile
WHERE
st_contains(my_custom_shapefile.geom, bread.geometry)
AND bread.yearmonth = 201812
AND bread.cruise=0
) AS a
) AS b
) as c
WHERE
rank1>1
AND lag_datetime IS NOT NULL
AND lag_geometry IS NOT NULL
) AS d
WHERE
time_dif_sec IS NOT NULL
AND time_dif_sec>0
AND time_dif_sec<180;
SELECT COUNT(*) FROM (
SELECT all_rows.year, all_rows.month, all_rows.hour,cast((sum(all_rows.time_dif_sec)/3600) as double precision) AS vht,sum(all_rows.distance_meter)*0.000621371 AS vmt,
cast(((sum(all_rows.distance_meter)*0.000621371)/(sum(all_rows.time_dif_sec)/3600)) as double precision) AS MFD_speed, Avg(all_rows.ind_speed) AS mean_speed,
percentile_approx(all_rows.ind_speed, 0.5) AS median_speed, count(all_rows.medallion) AS sample_size
FROM all_rows
GROUP BY year, month,day, hour
ORDER BY year, month,day, hour
);
drop table all_rows;
Updated Analyst Query - Uber H3
USE transportation_data;
SET tez.queue.name=production;
SET mapred.reduce.tasks=120;
with geomtab AS (
SELECT
st_astext(ST_GeomFromText(geom)) as geometry
FROM default.my_custom_shapefile
),
geom_array AS (
SELECT
polyfilltoarrayh3index(geomtab.geometry, NULL, 13) AS `hexid`
FROM
geomtab
),
flattened_geom_array AS (
SELECT
DISTINCT CAST(poly_hexid AS BIGINT) AS poly_hexid
FROM geom_array lateral view explode(hexid) geom_array as `poly_hexid`
),
bread AS (
SELECT
bread.medallion
, hack_number
, type
, vendor
, trip_number
, cruise
, tpep_datetime
, geometry
, longitude
, latitude
, year(tpep_datetime) AS `YEAR`
, month(tpep_datetime) AS `MONTH`
, day(tpep_datetime) AS `DAY`
, hour(tpep_datetime) AS `HOUR`
, cast(geotoh3(latitude,longitude, 13) as BIGINT ) AS `hexid`
FROM
bread
WHERE
bread.yearmonth = 201812
AND bread.cruise=0
),
spatial_join AS (
SELECT
bread.*
, flattened_geom_array.poly_hexid
, H3ToGeoBoundaryWkt(cast(flattened_geom_array.poly_hexid as bigint)) AS `wkt`
FROM
flattened_geom_array, bread
WHERE
bread.hexid = flattened_geom_array.poly_hexid
),
all_rows AS (
SELECT
d.*, (distance_meter/time_dif_sec)*2.23694 AS ind_speed --meter/second to mph
FROM
(
SELECT
c.*
, unix_timestamp(tpep_datetime)-unix_timestamp(lag_datetime) AS time_dif_sec
, ST_GeodesicLengthWGS84(ST_SetSRID(ST_Linestring(longitude,latitude, lag_longitude,lag_latitude), 4326)) AS distance_meter
FROM
(
SELECT
b.*
, lag(tpep_datetime) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_datetime
, lag(geometry) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_geometry
, lag(longitude) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_longitude
, lag(latitude) over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) as lag_latitude
FROM
(
SELECT
a.*
, rank() over (partition by year, month, day, hour, medallion, hack_number order by tpep_datetime asc) AS rank1
FROM
(
SELECT * FROM spatial_join
) AS a
) AS b
) as c
WHERE
rank1>1
AND lag_datetime IS NOT NULL
AND lag_geometry IS NOT NULL
) AS d
WHERE
time_dif_sec IS NOT NULL
AND time_dif_sec>0
AND time_dif_sec<180
)
SELECT COUNT(*) FROM (
SELECT
`year`
, `month`
, `hour`
, `day`
,cast((sum(all_rows.time_dif_sec)/3600) as double precision) AS vht
,sum(all_rows.distance_meter)*0.000621371 AS vmt
,cast(((sum(all_rows.distance_meter)*0.000621371)/(sum(all_rows.time_dif_sec)/3600)) as double precision) AS MFD_speed
,Avg(all_rows.ind_speed) AS mean_speed
,percentile_approx(all_rows.ind_speed, 0.5) AS median_speed
, count(all_rows.medallion) AS sample_size
FROM all_rows
GROUP BY
year
, month
, day
, hour
) P
;