From f1b1073587a897e9d6035a401673622fb55962a7 Mon Sep 17 00:00:00 2001 From: Mikael Hirki Date: Sun, 27 Oct 2024 16:08:10 +0200 Subject: [PATCH] Add deduplication of captions by building a set which has superior performance for large datasets. --- helpers/prompts.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/helpers/prompts.py b/helpers/prompts.py index 0060d6cd..bac03d77 100644 --- a/helpers/prompts.py +++ b/helpers/prompts.py @@ -462,6 +462,9 @@ def get_all_captions( # allow caching of multiple captions, if returned by the backend. captions.extend(caption) + # Deduplicate captions + captions = list(set(captions)) + return captions @staticmethod