From 4215d0d4cd148c1aaf79a7c691404815abe8d51a Mon Sep 17 00:00:00 2001 From: Guillaume Masclet Date: Tue, 5 Nov 2024 18:12:16 +0100 Subject: [PATCH] LKE-11926: Finer grained entity resolution ingestion --- src/api/entityResolution/index.ts | 56 +++++++++++++++++++++++++++---- src/api/entityResolution/types.ts | 2 +- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/api/entityResolution/index.ts b/src/api/entityResolution/index.ts index f966e5f3..3728384a 100644 --- a/src/api/entityResolution/index.ts +++ b/src/api/entityResolution/index.ts @@ -13,7 +13,7 @@ import { DeleteEntityResolutionMappingParams, EntityResolutionMapping, IngestionStatus, - StartIngestionParams, + StartEntityResolutionTaskParams, UpdateEntityResolutionMappingParams } from './types'; @@ -30,7 +30,7 @@ const { export class EntityResolutionAPI extends Request { /** - * Create a new entity resolution mapping. + * Create a new entity resolution mapping, for a given node category. */ createEntityResolutionMapping(params: CreateEntityResolutionMappingParams) { return this.request({ @@ -42,7 +42,7 @@ export class EntityResolutionAPI extends Request { } /** - * Update an existing entity resolution mapping. + * Update an existing entity resolution mapping, for a given node category. */ updateEntityResolutionMapping(params: UpdateEntityResolutionMappingParams) { return this.request({ @@ -54,7 +54,7 @@ export class EntityResolutionAPI extends Request { } /** - * Delete an existing entity resolution mapping. + * Delete an existing entity resolution mapping, for a given node category. */ deleteEntityResolutionMapping(params: DeleteEntityResolutionMappingParams) { return this.request({ @@ -81,9 +81,16 @@ export class EntityResolutionAPI extends Request { } /** - * Start the entity resolution ingestion on a given data-source. + * Start a full ingestion task on a given data-source. This task: + * - Ensures all the graph indexes needed for entity resolution are created. + * - Fetches all the graph nodes for each mapped category. + * - Converts each fetched node into an entity resolution record. + * - Sends all the converted records to the entity resolution server. + * - Materializes the resolved entities in the graph database. + * + * By default, immediately returns, without waiting for the task to complete. */ - startIngestion(params: StartIngestionParams) { + startFullIngestion(params: StartEntityResolutionTaskParams) { return this.request({ errors: [UNAUTHORIZED, FORBIDDEN, DATA_SOURCE_UNAVAILABLE, ILLEGAL_SOURCE_STATE], url: '/:sourceKey/entityResolution', @@ -92,6 +99,43 @@ export class EntityResolutionAPI extends Request { }); } + /** + * Start an incremental ingestion task on a given data-source. This task is similar to a full + * ingestion task, but it only fetches the graph nodes that have been modified after the latest + * ingestion. + * + * This task is going to fail if: + * - Incremental ingestion is not configured for the data-source. + * - Or if the ingestion state is not `done` (a full ingestion has to be done first). + * + * By default, immediately returns, without waiting for the task to complete. + */ + startIncrementalIngestion(params: StartEntityResolutionTaskParams) { + return this.request({ + errors: [UNAUTHORIZED, FORBIDDEN, DATA_SOURCE_UNAVAILABLE, ILLEGAL_SOURCE_STATE], + url: '/:sourceKey/entityResolution', + method: 'PATCH', + params: params + }); + } + + /** + * Start a purge task for a given data-source. This task: + * - Removes all the entity nodes/edges in the graph database. + * - Deletes all the graph indexes related to entity resolution. + * - Empties the data-source in the entity-resolution server. + * + * By default, immediately returns, without waiting for the task to complete. + */ + startPurge(params: StartEntityResolutionTaskParams) { + return this.request({ + errors: [UNAUTHORIZED, FORBIDDEN, DATA_SOURCE_UNAVAILABLE, ILLEGAL_SOURCE_STATE], + url: '/:sourceKey/entityResolution', + method: 'DELETE', + params: params + }); + } + /** * Get the status of the entity resolution ingestion, for a given data-source. */ diff --git a/src/api/entityResolution/types.ts b/src/api/entityResolution/types.ts index d92d128e..ea0ed88e 100644 --- a/src/api/entityResolution/types.ts +++ b/src/api/entityResolution/types.ts @@ -163,7 +163,7 @@ export type RecordAddressAttribute = (typeof RECORD_ADDRESS_ATTRIBUTES)[number]; export const RECORD_PHONE_ATTRIBUTES = ['number', 'fromDate', 'thruDate'] as const; export type RecordPhoneAttribute = (typeof RECORD_PHONE_ATTRIBUTES)[number]; -export interface StartIngestionParams extends IDataSourceParams { +export interface StartEntityResolutionTaskParams extends IDataSourceParams { waitForCompletion?: boolean; }