Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Avro Schema input processor #1753

Merged
merged 20 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
394 changes: 394 additions & 0 deletions src/helpers/AvroToMetaModel.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,394 @@
import {
AnyModel,
ArrayModel,
AvroSchema,
BooleanModel,
EnumModel,
EnumValueModel,
FloatModel,
IntegerModel,
MetaModel,
MetaModelOptions,
ObjectModel,
ObjectPropertyModel,
StringModel,
UnionModel
} from '../models';
import { Logger } from '../utils';

function getMetaModelOptions(AvroModel: AvroSchema): MetaModelOptions {
jonaslagoni marked this conversation as resolved.
Show resolved Hide resolved
const options: MetaModelOptions = {};

if (Array.isArray(AvroModel.type) && AvroModel.type !== null) {
options.isNullable = true;
} else {
options.isNullable = false;
}

return options;
}

function shouldBeAnyType(avroSchemaModel: AvroSchema): boolean {
// check the type array for the any type
const containsAllTypesButNotNull =
Array.isArray(avroSchemaModel.type) &&
avroSchemaModel.type.length >= 8 &&
avroSchemaModel.type !== null;
const containsAllTypes =
Array.isArray(avroSchemaModel.type) && avroSchemaModel.type.length === 10;
return containsAllTypesButNotNull || containsAllTypes;
}

export function AvroToMetaModel(
avroSchemaModel: AvroSchema,
alreadySeenModels: Map<AvroSchema, MetaModel> = new Map()
): MetaModel {
const hasModel = alreadySeenModels.has(avroSchemaModel);
if (hasModel) {
return alreadySeenModels.get(avroSchemaModel) as MetaModel;
}

const modelName = avroSchemaModel.name || 'undefined';

if (shouldBeAnyType(avroSchemaModel)) {
return new AnyModel(
modelName,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel)
);
}
const objectModel = toObjectModel(
avroSchemaModel,
modelName,
alreadySeenModels
);
if (objectModel !== undefined) {
return objectModel;
}
const arrayModel = toArrayModel(
avroSchemaModel,
modelName,
alreadySeenModels
);
if (arrayModel !== undefined) {
return arrayModel;
}
const booleanModel = toBooleanModel(avroSchemaModel, modelName);
if (booleanModel !== undefined) {
return booleanModel;
}
const stringModel = toStringModel(avroSchemaModel, modelName);
if (stringModel !== undefined) {
return stringModel;
}
const integerModel = toIntegerModel(avroSchemaModel, modelName);
if (integerModel !== undefined) {
return integerModel;
}
const floatModel = toFloatModel(avroSchemaModel, modelName);
if (floatModel !== undefined) {
return floatModel;
}
const enumModel = toEnumModel(avroSchemaModel, modelName);
if (enumModel !== undefined) {
return enumModel;
}
const unionModel = toUnionModel(
avroSchemaModel,
modelName,
alreadySeenModels
);
if (unionModel !== undefined) {
return unionModel;
}

Logger.warn('Failed to convert to MetaModel, defaulting to AnyModel.');
return new AnyModel(
modelName,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel)
);
}

export function toBooleanModel(
avroSchemaModel: AvroSchema,
name: string
): BooleanModel | undefined {
if (avroSchemaModel.type !== 'boolean') {
return undefined;
}
return new BooleanModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel)
);
}
export function toIntegerModel(
avroSchemaModel: AvroSchema,
name: string
): IntegerModel | undefined {
if (avroSchemaModel.type !== 'int' && avroSchemaModel.type !== 'long') {
return undefined;
}
return new IntegerModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel)
);
}
export function toFloatModel(
avroSchemaModel: AvroSchema,
name: string
): FloatModel | undefined {
if (avroSchemaModel.type !== 'float' && avroSchemaModel.type !== 'double') {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got my mistake 😅 didn't think of that

i guess need to make a check in every function for the AvroSchema object type to consider the value of type as an Avro Schema itself.

 if ((typeof avroSchemaModel.type !== 'string' && !Array.isArray(avroSchemaModel.type)) || (avroSchemaModel.type !== 'int' && avroSchemaModel.type !== 'long')) {
return undefined;
}

@jonaslagoni, Please correct me If I am heading heading wrong or if there is any alternative. 🙇🏻

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is a bit hard, because we have two inputs that have a type keyword. JSON Schema and Avro, each with overlapping types such as string and boolean.

So the answer to the question lies within "what is the difference between Avro and JSON Schema and how can you easily detect it" 😄

I don't have the perfect answer here unfortunately, other then look at the keywords that are different between them and match with those 🙂

Copy link
Collaborator Author

@akkshitgupta akkshitgupta Apr 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonaslagoni How about introducing a check explicitly in the shouldProcess() function using an additional schema property with a value of Avro to differentiate an Avro Schema before processing the input?

have you checked these comments: #1753 (comment) and #1753 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonaslagoni How about introducing a check explicitly in the shouldProcess() function using an additional schema property with a value of Avro to differentiate an Avro Schema before processing the input?

Can you give an example of what you mean?

Copy link
Collaborator Author

@akkshitgupta akkshitgupta Apr 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Example:

{
  "name": "Person",
  "type": "int",
  "schema": "avro"
}

@jonaslagoni here, we can add a check for the schema property against the value avro. This would be to verify that only avro schema is passed to the Avro processor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would never happen no, because schema is not part of the Avro standard and never will be probably 🙂

It's okay if the solution is not bullet proof, as long as we document the edge cases.

return undefined;
}
return new FloatModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel)
);
}
export function toStringModel(
avroSchemaModel: AvroSchema,
name: string
): StringModel | undefined {
if (avroSchemaModel.type !== 'string') {
return undefined;
}
return new StringModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel)
);
}
export function toEnumModel(
avroSchemaModel: AvroSchema,
name: string
): EnumModel | undefined {
if (
avroSchemaModel.type !== 'enum' ||
!Array.isArray(avroSchemaModel.symbols)
) {
return undefined;
}
const enumValueToEnumValueModel = (enumValue: unknown): EnumValueModel => {
if (typeof enumValue !== 'string') {
return new EnumValueModel(JSON.stringify(enumValue), enumValue);
}
return new EnumValueModel(enumValue, enumValue);
};

const metaModel = new EnumModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel),
[]
);

if (avroSchemaModel.symbols) {
for (const enumValue of avroSchemaModel.symbols) {
metaModel.values.push(enumValueToEnumValueModel(enumValue));
}
}
return metaModel;
}
// eslint-disable-next-line sonarjs/cognitive-complexity
export function toUnionModel(
avroSchemaModel: AvroSchema,
name: string,
alreadySeenModels: Map<AvroSchema, MetaModel>
): UnionModel | undefined {
const containsUnions = Array.isArray(avroSchemaModel.type);

// Should not create union from two types where one is null
const containsTypeWithNull =
Array.isArray(avroSchemaModel.type) &&
avroSchemaModel.type.length === 2 &&
avroSchemaModel.type.includes('null');
const containsSimpleTypeUnion =
Array.isArray(avroSchemaModel.type) &&
avroSchemaModel.type.length > 1 &&
!containsTypeWithNull;
const isAnyType = shouldBeAnyType(avroSchemaModel);

//Lets see whether we should have a union or not.
if (
(!containsSimpleTypeUnion && !containsUnions) ||
Array.isArray(avroSchemaModel.type) ||
isAnyType ||
containsTypeWithNull
) {
return undefined;
}
const unionModel = new UnionModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel),
[]
);

//cache model before continuing
if (!alreadySeenModels.has(avroSchemaModel)) {
alreadySeenModels.set(avroSchemaModel, unionModel);
}

// Has multiple types, so convert to union
if (containsUnions && Array.isArray(avroSchemaModel.type)) {
for (const unionCommonModel of avroSchemaModel.type) {
const isSingleNullType =
(Array.isArray(unionCommonModel.type) &&
unionCommonModel.type.length === 1 &&
unionCommonModel.type?.includes('null')) ||
unionCommonModel.type === 'null';
if (isSingleNullType) {
unionModel.options.isNullable = true;
} else {
const unionMetaModel = AvroToMetaModel(
unionCommonModel,
alreadySeenModels
);
unionModel.union.push(unionMetaModel);
}
}
return unionModel;
}

// Has simple union types
// Each must have a different name then the root union model, as it otherwise clashes when code is generated
const enumModel = toEnumModel(avroSchemaModel, `${name}_enum`);
if (enumModel !== undefined) {
unionModel.union.push(enumModel);
}
const objectModel = toObjectModel(
avroSchemaModel,
`${name}_object`,
alreadySeenModels
);
if (objectModel !== undefined) {
unionModel.union.push(objectModel);
}
// const arrayModel = toArrayModel(
// avroSchemaModel,
// `${name}_array`,
// alreadySeenModels
// );
// if (arrayModel !== undefined) {
// unionModel.union.push(arrayModel);
// }
const stringModel = toStringModel(avroSchemaModel, `${name}_string`);
if (stringModel !== undefined) {
unionModel.union.push(stringModel);
}
const floatModel = toFloatModel(avroSchemaModel, `${name}_float`);
if (floatModel !== undefined) {
unionModel.union.push(floatModel);
}
const integerModel = toIntegerModel(avroSchemaModel, `${name}_integer`);
if (integerModel !== undefined) {
unionModel.union.push(integerModel);
}
const booleanModel = toBooleanModel(avroSchemaModel, `${name}_boolean`);
if (booleanModel !== undefined) {
unionModel.union.push(booleanModel);
}
return unionModel;
}
export function toObjectModel(
avroSchemaModel: AvroSchema,
name: string,
alreadySeenModels: Map<AvroSchema, MetaModel>
): ObjectModel | undefined {
if (avroSchemaModel.type !== 'record') {
return undefined;
}
const metaModel = new ObjectModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel),
{}
);
// cache model before continuing
if (!alreadySeenModels.has(avroSchemaModel)) {
alreadySeenModels.set(avroSchemaModel, metaModel);
}

// fields: a required attribute of record and a JSON Array of JSON Objects
for (const prop of avroSchemaModel?.fields || []) {
const isRequired = avroSchemaModel.isRequired(prop.name);
const propertyModel = new ObjectPropertyModel(
prop.name ?? '',
isRequired,
AvroToMetaModel(prop, alreadySeenModels)
);
metaModel.properties[String(prop.name)] = propertyModel;
}

if (avroSchemaModel.extend?.length) {
metaModel.options.extend = [];

for (const extend of avroSchemaModel.extend) {
metaModel.options.extend.push(AvroToMetaModel(extend, alreadySeenModels));
}
}

return metaModel;
}
export function toArrayModel(
avroSchemaModel: AvroSchema,
name: string,
alreadySeenModels: Map<AvroSchema, MetaModel>
): ArrayModel | undefined {
if (!avroSchemaModel.type?.includes('array')) {
return undefined;
}
const isNormalArray = !Array.isArray(avroSchemaModel.items);
//items single type = normal array
//items not sat = normal array, any type
if (isNormalArray) {
const placeholderModel = new AnyModel(
'',
undefined,
getMetaModelOptions(avroSchemaModel)
);
const metaModel = new ArrayModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel),
placeholderModel
);
alreadySeenModels.set(avroSchemaModel, metaModel);
if (avroSchemaModel.items !== undefined) {
const valueModel = AvroToMetaModel(
avroSchemaModel.items as AvroSchema,
alreadySeenModels
);
metaModel.valueModel = valueModel;
}
return metaModel;
}

const valueModel = new UnionModel(
'union',
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel),
[]
);
const metaModel = new ArrayModel(
name,
avroSchemaModel.originalInput,
getMetaModelOptions(avroSchemaModel),
valueModel
);
alreadySeenModels.set(avroSchemaModel, metaModel);
if (avroSchemaModel.items !== undefined) {
for (const itemModel of Array.isArray(avroSchemaModel.items)
? avroSchemaModel.items
: [avroSchemaModel.items]) {
const itemsModel = AvroToMetaModel(itemModel, alreadySeenModels);
valueModel.union.push(itemsModel);
}
}

return metaModel;
}
1 change: 1 addition & 0 deletions src/helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ export * from './ConstrainHelpers';
export * from './PresetHelpers';
export * from './DependencyHelpers';
export * from './FilterHelpers';
export * from './AvroToMetaModel';
Loading
Loading