-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support Map and Arrays of Maps in BQ for StorageWrites for Beam Rows #22179
Changes from all commits
307ce71
00ecf6b
10aa27a
7c41bee
c9e22d6
27683f8
7cf633b
5e450e9
5f588e5
e6d7d56
cda4069
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,11 +29,13 @@ | |
import java.time.LocalDateTime; | ||
import java.time.LocalTime; | ||
import java.time.temporal.ChronoUnit; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.function.BiFunction; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import java.util.stream.StreamSupport; | ||
import javax.annotation.Nullable; | ||
import org.apache.beam.sdk.schemas.Schema; | ||
|
@@ -202,7 +204,19 @@ private static TableFieldSchema fieldDescriptorFromBeamField(Field field) { | |
builder = builder.setType(type); | ||
break; | ||
case MAP: | ||
throw new RuntimeException("Map types not supported by BigQuery."); | ||
@Nullable FieldType keyType = field.getType().getMapKeyType(); | ||
@Nullable FieldType valueType = field.getType().getMapValueType(); | ||
if (keyType == null || valueType == null) { | ||
throw new RuntimeException("Unexpected null element type!"); | ||
} | ||
|
||
builder = | ||
builder | ||
.setType(TableFieldSchema.Type.STRUCT) | ||
.addFields(fieldDescriptorFromBeamField(Field.of("key", keyType))) | ||
.addFields(fieldDescriptorFromBeamField(Field.of("value", valueType))) | ||
.setMode(TableFieldSchema.Mode.REPEATED); | ||
break; | ||
default: | ||
@Nullable | ||
TableFieldSchema.Type primitiveType = PRIMITIVE_TYPES.get(field.getType().getTypeName()); | ||
|
@@ -231,6 +245,8 @@ private static Object messageValueFromRowValue( | |
if (value == null) { | ||
if (fieldDescriptor.isOptional()) { | ||
return null; | ||
} else if (fieldDescriptor.isRepeated()) { | ||
return Collections.emptyList(); | ||
} else { | ||
throw new IllegalArgumentException( | ||
"Received null value for non-nullable field " + fieldDescriptor.getName()); | ||
|
@@ -250,9 +266,18 @@ private static Object toProtoValue( | |
if (arrayElementType == null) { | ||
throw new RuntimeException("Unexpected null element type!"); | ||
} | ||
return list.stream() | ||
.map(v -> toProtoValue(fieldDescriptor, arrayElementType, v)) | ||
.collect(Collectors.toList()); | ||
Boolean shouldFlatMap = | ||
arrayElementType.getTypeName().isCollectionType() | ||
|| arrayElementType.getTypeName().isMapType(); | ||
|
||
Stream<Object> valueStream = | ||
list.stream().map(v -> toProtoValue(fieldDescriptor, arrayElementType, v)); | ||
|
||
if (shouldFlatMap) { | ||
valueStream = valueStream.flatMap(vs -> ((List) vs).stream()); | ||
} | ||
|
||
return valueStream.collect(Collectors.toList()); | ||
case ITERABLE: | ||
Iterable<Object> iterable = (Iterable<Object>) value; | ||
@Nullable FieldType iterableElementType = beamFieldType.getCollectionElementType(); | ||
|
@@ -263,12 +288,46 @@ private static Object toProtoValue( | |
.map(v -> toProtoValue(fieldDescriptor, iterableElementType, v)) | ||
.collect(Collectors.toList()); | ||
case MAP: | ||
throw new RuntimeException("Map types not supported by BigQuery."); | ||
Map<Object, Object> map = (Map<Object, Object>) value; | ||
@Nullable FieldType keyType = beamFieldType.getMapKeyType(); | ||
@Nullable FieldType valueType = beamFieldType.getMapValueType(); | ||
if (keyType == null || valueType == null) { | ||
throw new RuntimeException("Unexpected null element type!"); | ||
} | ||
|
||
return map.entrySet().stream() | ||
.map( | ||
(Map.Entry<Object, Object> entry) -> | ||
mapEntryToProtoValue( | ||
fieldDescriptor.getMessageType(), keyType, valueType, entry)) | ||
.collect(Collectors.toList()); | ||
default: | ||
return scalarToProtoValue(beamFieldType, value); | ||
} | ||
} | ||
|
||
static Object mapEntryToProtoValue( | ||
Descriptor descriptor, | ||
FieldType keyFieldType, | ||
FieldType valueFieldType, | ||
Map.Entry<Object, Object> entryValue) { | ||
DynamicMessage.Builder builder = DynamicMessage.newBuilder(descriptor); | ||
FieldDescriptor keyFieldDescriptor = | ||
Preconditions.checkNotNull(descriptor.findFieldByName("key")); | ||
@Nullable Object key = toProtoValue(keyFieldDescriptor, keyFieldType, entryValue.getKey()); | ||
if (key != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are null keys allowed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question, AFAICT the backing Map in the Row object is a HashMap (with expected size) so I would say that null keys are allowed in a map property for a Row object. |
||
builder.setField(keyFieldDescriptor, key); | ||
} | ||
FieldDescriptor valueFieldDescriptor = | ||
Preconditions.checkNotNull(descriptor.findFieldByName("value")); | ||
@Nullable | ||
Object value = toProtoValue(valueFieldDescriptor, valueFieldType, entryValue.getValue()); | ||
if (value != null) { | ||
builder.setField(valueFieldDescriptor, value); | ||
} | ||
return builder.build(); | ||
} | ||
|
||
@VisibleForTesting | ||
static Object scalarToProtoValue(FieldType beamFieldType, Object value) { | ||
if (beamFieldType.getTypeName() == TypeName.LOGICAL_TYPE) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
explain why flatMap is correct here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because BQ does not support arrays of maps, this code is making the decision of flattening those structures (the if condition is computed based on that particular scenario).
[
map1 [k1,v2] [k2,v2] ,
map2 [k3,v3],
map3 [k4,v4] [k5,v5],
]
------------------------- to
[
record {key:k1, value:v1},
record {key:k2, value:v2},
record {key:k3, value:v3},
record {key:k4, value:v4},
record {key:k5, value:v5}
]
It respects the order in the array and the inherent order of iteration in the maps, but it won't check for repeated keys across the maps in the original array.