Skip to content

Commit

Permalink
Initialize WARC dedup table when CDX dedup file is being read
Browse files Browse the repository at this point in the history
  • Loading branch information
the-blank-x committed Jan 29, 2024
1 parent cb35e76 commit 463b2f6
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/warc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1260,10 +1260,6 @@ warc_start_new_file (bool meta)

warc_current_file_number++;

/* init the hash table */
warc_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
warc_cmp_sha1_digest);

base_filename_length = strlen (opt.warc_filename);
/* filename format: base + "-" + 5 digit serial number + ".warc.zst" */
new_filename = xmalloc (base_filename_length + 1 + 5 + 9 + 1);
Expand Down Expand Up @@ -1651,6 +1647,10 @@ _("CDX file does not list record ids. (Missing column 'u'.)\n"));
{
int nrecords;

/* init the hash table */
warc_dedup_table = hash_table_new (1000, warc_hash_sha1_digest,
warc_cmp_sha1_digest);

/* Load CDX data into the table. */

do
Expand Down

0 comments on commit 463b2f6

Please sign in to comment.