From 50af0d3110562ed18811282bbc51a3b7ed6ebee6 Mon Sep 17 00:00:00 2001 From: skorper Date: Tue, 8 Mar 2022 17:35:24 -0800 Subject: [PATCH 1/3] Updated deletebyquery tool to improve speed --- tools/deletebyquery/deletebyquery.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/deletebyquery/deletebyquery.py b/tools/deletebyquery/deletebyquery.py index 8ed70ddc..c5ff28ef 100644 --- a/tools/deletebyquery/deletebyquery.py +++ b/tools/deletebyquery/deletebyquery.py @@ -23,7 +23,7 @@ from cassandra.auth import PlainTextAuthProvider import cassandra.concurrent from cassandra.cluster import Cluster -from cassandra.policies import RoundRobinPolicy, TokenAwarePolicy +from cassandra.policies import RoundRobinPolicy, TokenAwarePolicy, WhiteListRoundRobinPolicy from solrcloudpy import SolrConnection, SearchOptions from six.moves import input @@ -75,7 +75,8 @@ def delete_by_query(args): se = SearchOptions() se.commonparams.q(args.query) \ .fl(SOLR_UNIQUE_KEY) \ - .fl('id') + .fl('id') \ + .rows(50000) for fq in args.filterquery if args.filterquery is not None else []: se.commonparams.fq(fq) @@ -89,11 +90,11 @@ def delete_by_query(args): else: raise RuntimeError("either query or jsonparams is required") - if check_query(query): + if args.force or check_query(query): logging.info("Collecting tiles ....") solr_docs = do_solr_query(query) - if confirm_delete(len(solr_docs)): + if args.force or confirm_delete(len(solr_docs)): deleted_ids = do_delete(solr_docs, query) logging.info("Deleted tile IDs %s" % json.dumps([str(doc_id) for doc_id in deleted_ids], indent=2)) else: @@ -160,16 +161,15 @@ def do_solr_query(query): break else: next_cursor_mark = solr_response.result.nextCursorMark - - doc_ids.extend([uuid.UUID(doc['id']) for doc in solr_response.result.response.docs]) + ids = [uuid.UUID(doc['id']) for doc in solr_response.result.response.docs] + delete_from_cassandra(ids) + doc_ids.extend(ids) return doc_ids def do_delete(doc_ids, query): - logging.info("Executing Cassandra delete...") - delete_from_cassandra(doc_ids) - logging.info("Executing Solr delete...") + logging.info("Executing delete...") delete_from_solr(query) return doc_ids @@ -260,6 +260,11 @@ def parse_args(): choices=['1', '2', '3', '4', '5'], default='3') + parser.add_argument('-f', '--force', + help='The version of the Cassandra protocol the driver should use.', + required=False, + action='store_true') + return parser.parse_args() From 0936182aef23988d3636965eac203b416932d83a Mon Sep 17 00:00:00 2001 From: skorper Date: Tue, 8 Mar 2022 17:37:33 -0800 Subject: [PATCH 2/3] Updated deletebyquery README --- tools/deletebyquery/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/deletebyquery/README.md b/tools/deletebyquery/README.md index 97cf4db9..f2528fba 100644 --- a/tools/deletebyquery/README.md +++ b/tools/deletebyquery/README.md @@ -22,4 +22,6 @@ To delete a dataset called `my_dataset`, with SDAP deployed using the Helm chart ``` cd /incubator-sdap-nexus/tools/deletebyquery python deletebyquery.py --solr sdap-solr-svc:8983 --cassandra sdap-cassandra --cassandraUsername cassandra --cassandraPassword cassandra --query 'dataset_s:"my_dataset"' -``` \ No newline at end of file +``` + +You can provide a flag `-f` or `--force` which will cause the script to skip all prompts before deleting. \ No newline at end of file From 395d5b9eac0ce012c6d17b1d3df94eb0bec5e095 Mon Sep 17 00:00:00 2001 From: skorper Date: Tue, 8 Mar 2022 17:38:16 -0800 Subject: [PATCH 3/3] Removed unused import --- tools/deletebyquery/deletebyquery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deletebyquery/deletebyquery.py b/tools/deletebyquery/deletebyquery.py index c5ff28ef..f35e5258 100644 --- a/tools/deletebyquery/deletebyquery.py +++ b/tools/deletebyquery/deletebyquery.py @@ -23,7 +23,7 @@ from cassandra.auth import PlainTextAuthProvider import cassandra.concurrent from cassandra.cluster import Cluster -from cassandra.policies import RoundRobinPolicy, TokenAwarePolicy, WhiteListRoundRobinPolicy +from cassandra.policies import RoundRobinPolicy, TokenAwarePolicy from solrcloudpy import SolrConnection, SearchOptions from six.moves import input