From 463b2f68439cbc7e8458476392aa40dc1c7b2186 Mon Sep 17 00:00:00 2001 From: blankie Date: Sat, 27 Jan 2024 08:49:45 +1100 Subject: [PATCH] Initialize WARC dedup table when CDX dedup file is being read --- src/warc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/warc.c b/src/warc.c index 311fe7b4..5d5f2445 100644 --- a/src/warc.c +++ b/src/warc.c @@ -1260,10 +1260,6 @@ warc_start_new_file (bool meta) warc_current_file_number++; - /* init the hash table */ - warc_dedup_table = hash_table_new (1000, warc_hash_sha1_digest, - warc_cmp_sha1_digest); - base_filename_length = strlen (opt.warc_filename); /* filename format: base + "-" + 5 digit serial number + ".warc.zst" */ new_filename = xmalloc (base_filename_length + 1 + 5 + 9 + 1); @@ -1651,6 +1647,10 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n")); { int nrecords; + /* init the hash table */ + warc_dedup_table = hash_table_new (1000, warc_hash_sha1_digest, + warc_cmp_sha1_digest); + /* Load CDX data into the table. */ do