-
Notifications
You must be signed in to change notification settings - Fork 322
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Added delete document functionality (#464)
* feat: Added delete document functionality Added delete functionality for all types of documents (Files, Texts, Q&A and Websites). The feature deletes the documents from S3 upload bucket, S3 processed bucket, DynamoDB documents table, OpenSearch index and also updates DynamoDB workspaces table. Following are the major code changes: 1. Added delete button on UI for each row of the documents. 2. Added confirmation dialog via Modal so that user can Cancel/Delete the document from there. 3. Created AWS step function to use State Machines and delete document workflow. This way, the whole process is organised and is automatically rolled back if any of the operation in the step function fails. Major components and their working is as below: 1. documents-tab.tsx has functionality related to delete button and handling of confirmation Modal. 2. documents-client.ts has function deleteDocument to hit the backend API. 3. delete_document function in lib/chatbot-api/functions/api-handler/routes/documents.py handles the API request 4. deleteDocumentWorkflow is created in lib/rag-engines/workspaces/index.ts 5. delete-document.ts has internal structure of Delete document workflow 6. The lambda function to handle the workflow is written in lib/rag-engines/workspaces/functions/delete-document-workflow/delete/index.py 7. The execution of state machine starts in delete_document function of lib/shared/layers/python-sdk/python/genai_core/documents.py 8. The actual deletion of documents happens in delete_open_search_document function of lib/shared/layers/python-sdk/python/genai_core/opensearch/delete.py Request flow would be like documents-client -> documents.py (api handler) -> documents.py (genai_core) -> index.py (delete-document-workflow) -> delete.py (genai_core/opensearch) As part of this change, also updated version of opensearch-py which was initially updated as calling direct http methods was not allowed in earlier version but later on calling http methods was not required. Kept this change for future perspective as it would have no impact. * Added missing cursor commit for Aurora DB --------- Co-authored-by: Bigad Soleiman <[email protected]>
- Loading branch information
1 parent
3cacec0
commit 06aed3b
Showing
19 changed files
with
711 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
import * as cdk from "aws-cdk-lib"; | ||
import * as iam from "aws-cdk-lib/aws-iam"; | ||
import * as lambda from "aws-cdk-lib/aws-lambda"; | ||
import * as logs from "aws-cdk-lib/aws-logs"; | ||
import * as sfn from "aws-cdk-lib/aws-stepfunctions"; | ||
import * as tasks from "aws-cdk-lib/aws-stepfunctions-tasks"; | ||
import { Construct } from "constructs"; | ||
import * as path from "path"; | ||
import { Shared } from "../../shared"; | ||
import { SystemConfig } from "../../shared/types"; | ||
import { AuroraPgVector } from "../aurora-pgvector"; | ||
import { DataImport } from "../data-import"; | ||
import { KendraRetrieval } from "../kendra-retrieval"; | ||
import { OpenSearchVector } from "../opensearch-vector"; | ||
import { RagDynamoDBTables } from "../rag-dynamodb-tables"; | ||
import { RemovalPolicy } from "aws-cdk-lib"; | ||
|
||
export interface DeleteDocumentProps { | ||
readonly config: SystemConfig; | ||
readonly shared: Shared; | ||
readonly dataImport: DataImport; | ||
readonly ragDynamoDBTables: RagDynamoDBTables; | ||
readonly auroraPgVector?: AuroraPgVector; | ||
readonly openSearchVector?: OpenSearchVector; | ||
readonly kendraRetrieval?: KendraRetrieval; | ||
} | ||
|
||
export class DeleteDocument extends Construct { | ||
public readonly stateMachine?: sfn.StateMachine; | ||
|
||
constructor(scope: Construct, id: string, props: DeleteDocumentProps) { | ||
super(scope, id); | ||
|
||
const deleteFunction = new lambda.Function(this, "DeleteDocumentFunction", { | ||
vpc: props.shared.vpc, | ||
code: props.shared.sharedCode.bundleWithLambdaAsset( | ||
path.join(__dirname, "./functions/delete-document-workflow/delete") | ||
), | ||
runtime: props.shared.pythonRuntime, | ||
architecture: props.shared.lambdaArchitecture, | ||
handler: "index.lambda_handler", | ||
layers: [props.shared.powerToolsLayer, props.shared.commonLayer], | ||
timeout: cdk.Duration.minutes(15), | ||
logRetention: logs.RetentionDays.ONE_WEEK, | ||
environment: { | ||
...props.shared.defaultEnvironmentVariables, | ||
AURORA_DB_SECRET_ID: props.auroraPgVector?.database.secret | ||
?.secretArn as string, | ||
UPLOAD_BUCKET_NAME: props.dataImport.uploadBucket.bucketName, | ||
PROCESSING_BUCKET_NAME: props.dataImport.processingBucket.bucketName, | ||
WORKSPACES_TABLE_NAME: | ||
props.ragDynamoDBTables.workspacesTable.tableName, | ||
WORKSPACES_BY_OBJECT_TYPE_INDEX_NAME: | ||
props.ragDynamoDBTables.workspacesByObjectTypeIndexName, | ||
DOCUMENTS_TABLE_NAME: | ||
props.ragDynamoDBTables?.documentsTable.tableName ?? "", | ||
DOCUMENTS_BY_COMPOUND_KEY_INDEX_NAME: | ||
props.ragDynamoDBTables?.documentsByCompoundKeyIndexName ?? "", | ||
DEFAULT_KENDRA_S3_DATA_SOURCE_BUCKET_NAME: | ||
props.kendraRetrieval?.kendraS3DataSourceBucket?.bucketName ?? "", | ||
OPEN_SEARCH_COLLECTION_ENDPOINT: | ||
props.openSearchVector?.openSearchCollectionEndpoint ?? "", | ||
}, | ||
}); | ||
|
||
if (props.auroraPgVector) { | ||
props.auroraPgVector.database.secret?.grantRead(deleteFunction); | ||
props.auroraPgVector.database.connections.allowDefaultPortFrom( | ||
deleteFunction | ||
); | ||
} | ||
|
||
if (props.openSearchVector) { | ||
deleteFunction.addToRolePolicy( | ||
new iam.PolicyStatement({ | ||
actions: [ | ||
"aoss:APIAccessAll", | ||
"aoss:DescribeIndex", | ||
"aoss:UpdateIndex", | ||
], | ||
resources: [props.openSearchVector.openSearchCollection.attrArn], | ||
}) | ||
); | ||
|
||
props.openSearchVector.addToAccessPolicy( | ||
"delete-document", | ||
[deleteFunction.role?.roleArn], | ||
[ | ||
"aoss:DescribeIndex", | ||
"aoss:UpdateIndex", | ||
"aoss:ReadDocument", | ||
"aoss:WriteDocument", | ||
] | ||
); | ||
} | ||
|
||
props.dataImport.uploadBucket.grantReadWrite(deleteFunction); | ||
props.dataImport.processingBucket.grantReadWrite(deleteFunction); | ||
props.kendraRetrieval?.kendraS3DataSourceBucket?.grantReadWrite( | ||
deleteFunction | ||
); | ||
props.ragDynamoDBTables.workspacesTable.grantReadWriteData(deleteFunction); | ||
props.ragDynamoDBTables.documentsTable.grantReadWriteData(deleteFunction); | ||
|
||
const handleError = new tasks.DynamoUpdateItem(this, "HandleError", { | ||
table: props.ragDynamoDBTables.documentsTable, | ||
key: { | ||
workspace_id: tasks.DynamoAttributeValue.fromString( | ||
sfn.JsonPath.stringAt("$.workspace_id") | ||
), | ||
document_id: tasks.DynamoAttributeValue.fromString( | ||
sfn.JsonPath.stringAt("$.document_id") | ||
), | ||
}, | ||
updateExpression: "set #status = :error", | ||
expressionAttributeNames: { | ||
"#status": "status", | ||
}, | ||
expressionAttributeValues: { | ||
":error": tasks.DynamoAttributeValue.fromString("error"), | ||
}, | ||
}).next( | ||
new sfn.Fail(this, "Fail", { | ||
cause: "Document deletion failed", | ||
}) | ||
); | ||
|
||
const setDeleting = new tasks.DynamoUpdateItem(this, "SetDeleting", { | ||
table: props.ragDynamoDBTables.documentsTable, | ||
key: { | ||
workspace_id: tasks.DynamoAttributeValue.fromString( | ||
sfn.JsonPath.stringAt("$.workspace_id") | ||
), | ||
document_id: tasks.DynamoAttributeValue.fromString( | ||
sfn.JsonPath.stringAt("$.document_id") | ||
), | ||
}, | ||
updateExpression: "set #status=:statusValue", | ||
expressionAttributeNames: { | ||
"#status": "status", | ||
}, | ||
expressionAttributeValues: { | ||
":statusValue": tasks.DynamoAttributeValue.fromString("deleting"), | ||
}, | ||
resultPath: sfn.JsonPath.DISCARD, | ||
}); | ||
|
||
const deleteTask = new tasks.LambdaInvoke(this, "Delete", { | ||
lambdaFunction: deleteFunction, | ||
resultPath: "$.deleteResult", | ||
}).addCatch(handleError, { | ||
errors: ["States.ALL"], | ||
resultPath: "$.deleteResult", | ||
}); | ||
|
||
const workflow = setDeleting | ||
.next(deleteTask) | ||
.next(new sfn.Succeed(this, "Success")); | ||
|
||
const logGroup = new logs.LogGroup(this, "DeleteDocumentSMLogGroup", { | ||
removalPolicy: RemovalPolicy.DESTROY, | ||
}); | ||
|
||
const stateMachine = new sfn.StateMachine(this, "DeleteDocument", { | ||
definitionBody: sfn.DefinitionBody.fromChainable(workflow), | ||
timeout: cdk.Duration.minutes(5), | ||
comment: "Delete Document Workflow", | ||
tracingEnabled: true, | ||
logs: { | ||
destination: logGroup, | ||
level: sfn.LogLevel.ALL, | ||
}, | ||
}); | ||
|
||
this.stateMachine = stateMachine; | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
lib/rag-engines/workspaces/functions/delete-document-workflow/delete/index.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import genai_core.types | ||
import genai_core.workspaces | ||
import genai_core.documents | ||
import genai_core.aurora.delete | ||
import genai_core.opensearch.delete | ||
import genai_core.kendra.delete | ||
from aws_lambda_powertools import Logger | ||
from aws_lambda_powertools.utilities.typing import LambdaContext | ||
|
||
logger = Logger() | ||
|
||
|
||
@logger.inject_lambda_context(log_event=True) | ||
def lambda_handler(event, context: LambdaContext): | ||
workspace_id = event["workspace_id"] | ||
document_id = event["document_id"] | ||
workspace = genai_core.workspaces.get_workspace(workspace_id) | ||
if workspace is None: | ||
raise genai_core.types.CommonError("Workspace not found") | ||
|
||
document = genai_core.documents.get_document(workspace_id, document_id) | ||
if document is None: | ||
raise genai_core.types.CommonError("Document not found") | ||
|
||
if workspace["engine"] == "opensearch": | ||
genai_core.opensearch.delete.delete_open_search_document(workspace_id, document) | ||
elif workspace["engine"] == "aurora": | ||
genai_core.aurora.delete.delete_aurora_document(workspace_id, document) | ||
elif workspace["engine"] == "kendra": | ||
genai_core.kendra.delete.delete_kendra_document(workspace_id, document) | ||
else: | ||
raise genai_core.types.CommonError("Workspace engine not supported") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.