From 343036fd841bfccf5792e7409bee771953bc4c0c Mon Sep 17 00:00:00 2001 From: Thomas Egense Date: Wed, 16 Aug 2023 12:51:03 +0200 Subject: [PATCH] Renamed properties used for unittest to solrwayback_unittest.properties Refactored unittest loading of properties to use this property under test/resources. Added the two default properties to the bundle folder. --- src/bundle/README.md | 1 + .../properties/solrwayback.properties | 0 .../properties/solrwaybackweb.properties | 0 .../interfaces/AutoFileResolver.java | 2 +- src/test/java/README.txt | 12 ++ .../solrwayback/export/TestGenerateCSV.java | 3 +- .../export/TestGenerateLinkGraphCSV.java | 3 +- .../parsers/HtmlParserUrlRewriterTest.java | 2 +- .../parsers/ScriptRewriterTest.java | 2 +- .../solrwayback/parsers/TestExportArc.java | 3 +- .../solrwayback/parsers/TestExportWarc.java | 3 +- .../parsers/TestExportWarcStreaming.java | 4 +- .../solr/SolrGenericStreamingTest.java | 3 +- .../solrwayback/solr/UrlResolveTest.java | 3 +- .../solrwayback/util/URLAbsoluterTest.java | 2 +- .../solrwayback_unittest.properties | 114 ++++++++++++++++++ .../solrwaybackweb_unittest.properties | 99 +++++++++++++++ 17 files changed, 244 insertions(+), 12 deletions(-) rename src/{test/resources => bundle}/properties/solrwayback.properties (100%) rename src/{test/resources => bundle}/properties/solrwaybackweb.properties (100%) create mode 100644 src/test/java/README.txt create mode 100644 src/test/resources/properties/solrwayback_unittest.properties create mode 100644 src/test/resources/properties/solrwaybackweb_unittest.properties diff --git a/src/bundle/README.md b/src/bundle/README.md index 5ece1abbb..1244d6c94 100644 --- a/src/bundle/README.md +++ b/src/bundle/README.md @@ -5,3 +5,4 @@ Resources used when building the SolrWayback bundle. - `install SolrWayback bundle`: See install guide [SolrWayback README](https://github.com/netarchivesuite/solrwayback/blob/master/README.md/) - `indexing`: Scripts for indexing WARC files using [webarchive-discovery](https://github.com/ukwa/webarchive-discovery/) - `Changes.md`: See version history [SolrWayback](https://github.com/netarchivesuite/solrwayback/blob/master/CHANGES.md/) +- `properties`: Default properties for the SolrWayback Bundle \ No newline at end of file diff --git a/src/test/resources/properties/solrwayback.properties b/src/bundle/properties/solrwayback.properties similarity index 100% rename from src/test/resources/properties/solrwayback.properties rename to src/bundle/properties/solrwayback.properties diff --git a/src/test/resources/properties/solrwaybackweb.properties b/src/bundle/properties/solrwaybackweb.properties similarity index 100% rename from src/test/resources/properties/solrwaybackweb.properties rename to src/bundle/properties/solrwaybackweb.properties diff --git a/src/main/java/dk/kb/netarchivesuite/solrwayback/interfaces/AutoFileResolver.java b/src/main/java/dk/kb/netarchivesuite/solrwayback/interfaces/AutoFileResolver.java index 9fc1f3ffb..175bc4de6 100644 --- a/src/main/java/dk/kb/netarchivesuite/solrwayback/interfaces/AutoFileResolver.java +++ b/src/main/java/dk/kb/netarchivesuite/solrwayback/interfaces/AutoFileResolver.java @@ -184,7 +184,7 @@ private void scanRoot(Path path, Map warcs) { } String filename = pathEntry.getFileName().toString(); if (!filePattern.matcher(filename).matches()) { - log.debug("Scanner encountered non-matching file '{}'", filename); + log.trace("Scanner encountered non-matching file '{}'", filename); //spamming too much during build return; } if (warcs.containsKey(filename)) { diff --git a/src/test/java/README.txt b/src/test/java/README.txt new file mode 100644 index 000000000..a77d5bb19 --- /dev/null +++ b/src/test/java/README.txt @@ -0,0 +1,12 @@ +Information about unittests. + +Property loading. +For unittest that require the properties to be initialised use this way to load the properties +PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); + +This will use the property files under test/resources/properties + +If you need a unittest with quite different properties, you can create a new property file and load that. Just be sure +to include unittest in the name of the property. + +TODO: more documentation \ No newline at end of file diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateCSV.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateCSV.java index f413bebcb..caa36283f 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateCSV.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateCSV.java @@ -2,6 +2,7 @@ import java.io.PrintWriter; +import dk.kb.netarchivesuite.solrwayback.UnitTestUtils; import dk.kb.netarchivesuite.solrwayback.properties.PropertiesLoader; import dk.kb.netarchivesuite.solrwayback.solr.SolrStreamingExportClient; import org.apache.solr.client.solrj.SolrClient; @@ -13,7 +14,7 @@ public class TestGenerateCSV { public static void main(String[] args) throws Exception{ - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); String query = "thomas egense"; String filter = null; diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateLinkGraphCSV.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateLinkGraphCSV.java index 476f64067..352018c31 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateLinkGraphCSV.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/export/TestGenerateLinkGraphCSV.java @@ -2,6 +2,7 @@ import java.io.PrintWriter; +import dk.kb.netarchivesuite.solrwayback.UnitTestUtils; import dk.kb.netarchivesuite.solrwayback.properties.PropertiesLoader; import dk.kb.netarchivesuite.solrwayback.solr.SolrStreamingLinkGraphCSVExportClient; import org.apache.solr.client.solrj.SolrClient; @@ -13,7 +14,7 @@ public class TestGenerateLinkGraphCSV { public static void main(String[] args) throws Exception{ - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); String query = "katte"; diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/HtmlParserUrlRewriterTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/HtmlParserUrlRewriterTest.java index 55743b5e1..5d6a3e44e 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/HtmlParserUrlRewriterTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/HtmlParserUrlRewriterTest.java @@ -38,7 +38,7 @@ public class HtmlParserUrlRewriterTest { public void invalidateProperties() throws Exception{ // Need this to ensure that the normaliser has a known setting - PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback.properties").getPath()); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); Normalisation.setTypeFromConfig(); // We need this so that we know what the Solr server is set to diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/ScriptRewriterTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/ScriptRewriterTest.java index a5ee97447..553699906 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/ScriptRewriterTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/ScriptRewriterTest.java @@ -32,7 +32,7 @@ public class ScriptRewriterTest { public void invalidateProperties() throws IOException { // Need this to ensure that the normaliser has a known setting - PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback.properties").getPath()); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); Normalisation.setTypeFromConfig(); // PropertiesLoader.initProperties(); // Also need this so that we know what the Solr server is set to diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportArc.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportArc.java index 7b340f75f..aa2529e56 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportArc.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportArc.java @@ -5,6 +5,7 @@ import java.nio.file.Paths; import java.nio.file.StandardOpenOption; +import dk.kb.netarchivesuite.solrwayback.UnitTestUtils; import dk.kb.netarchivesuite.solrwayback.interfaces.ArcSource; import dk.kb.netarchivesuite.solrwayback.properties.PropertiesLoader; import dk.kb.netarchivesuite.solrwayback.service.dto.ArcEntry; @@ -13,7 +14,7 @@ public class TestExportArc { public static void main (String[] args) throws Exception{ - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); String arcFile="/media/teg/1200GB_SSD/netarkiv/0205/filedir/27119-33-20080401194737-00004-kb-prod-har-001.kb.dk.arc.gz"; diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarc.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarc.java index 31838cb67..9d3f7ed15 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarc.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarc.java @@ -8,6 +8,7 @@ import java.nio.file.StandardOpenOption; import java.util.List; +import dk.kb.netarchivesuite.solrwayback.UnitTestUtils; import dk.kb.netarchivesuite.solrwayback.interfaces.ArcSource; import dk.kb.netarchivesuite.solrwayback.properties.PropertiesLoader; import dk.kb.netarchivesuite.solrwayback.service.dto.ArcEntry; @@ -19,7 +20,7 @@ public class TestExportWarc { public static void main (String[] args) throws Exception{ - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); SearchResult search = NetarchiveSolrClient.getInstance().search("hash:\"sha1:PROTE66RZ6GDXPZI3ZAHG6YPCXRKZMEN\"", 100000); // /netarkiv/0105/filedir/272829-30-20170318193124175-00168-sb-prod-har-001.statsbiblioteket.dk.warc.gz diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarcStreaming.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarcStreaming.java index 3c01d937e..d6a8987de 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarcStreaming.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/parsers/TestExportWarcStreaming.java @@ -36,7 +36,7 @@ public class TestExportWarcStreaming extends UnitTestUtils { @Before public void setUpProperties() throws Exception{ - PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback.properties").getPath()); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); } @Test @@ -268,7 +268,7 @@ private void assertBinaryEnding(byte[] expected, byte[] exported) { } public static void main(String[] args) throws Exception{ - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); String source_file_path="/home/teg/workspace/solrwayback/storedanske_export-00000.warc"; int offset = 515818793; ArcEntry warcEntry = WarcParser.getWarcEntry(ArcSource.fromFile(source_file_path),offset); diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/SolrGenericStreamingTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/SolrGenericStreamingTest.java index 3cc494c60..a41356aef 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/SolrGenericStreamingTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/SolrGenericStreamingTest.java @@ -14,6 +14,7 @@ */ package dk.kb.netarchivesuite.solrwayback.solr; +import dk.kb.netarchivesuite.solrwayback.UnitTestUtils; import dk.kb.netarchivesuite.solrwayback.facade.Facade; import dk.kb.netarchivesuite.solrwayback.properties.PropertiesLoader; import dk.kb.netarchivesuite.solrwayback.service.exception.InvalidArgumentServiceException; @@ -57,7 +58,7 @@ public class SolrGenericStreamingTest { public static void setUp() throws Exception { log.info("Setting up embedded server"); - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); coreContainer = new CoreContainer(SOLR_HOME); coreContainer.load(); diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/UrlResolveTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/UrlResolveTest.java index eed185dda..a051dd3d4 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/UrlResolveTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/solr/UrlResolveTest.java @@ -14,6 +14,7 @@ */ package dk.kb.netarchivesuite.solrwayback.solr; +import dk.kb.netarchivesuite.solrwayback.UnitTestUtils; import dk.kb.netarchivesuite.solrwayback.parsers.HtmlParserUrlRewriter; import dk.kb.netarchivesuite.solrwayback.parsers.ParseResult; import dk.kb.netarchivesuite.solrwayback.properties.PropertiesLoader; @@ -61,7 +62,7 @@ public class UrlResolveTest { public static void setUp() throws Exception { log.info("Setting up embedded server"); - PropertiesLoader.initProperties(); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); coreContainer = new CoreContainer(SOLR_HOME); coreContainer.load(); diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/util/URLAbsoluterTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/util/URLAbsoluterTest.java index 1d35f455e..37e10f7c0 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/util/URLAbsoluterTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/util/URLAbsoluterTest.java @@ -28,7 +28,7 @@ public class URLAbsoluterTest { @Before public void setUpProperties() throws Exception{ - PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback.properties").getPath()); + PropertiesLoader.initProperties(UnitTestUtils.getFile("properties/solrwayback_unittest.properties").getPath()); // We need this so that we know what the Solr server is set to PropertiesLoader.WAYBACK_BASEURL = "http://localhost:0000/solrwayback/"; } diff --git a/src/test/resources/properties/solrwayback_unittest.properties b/src/test/resources/properties/solrwayback_unittest.properties new file mode 100644 index 000000000..64ae6a639 --- /dev/null +++ b/src/test/resources/properties/solrwayback_unittest.properties @@ -0,0 +1,114 @@ +##solrwayback.properties (UTF-8) + +##Url to the UWKA warc-indexer solr-server. Last part is the collectionname +solr.server=http://localhost:8983/solr/netarchivebuilder/ + +#Solr caching. Will be default false if not defined +solr.server.caching=true +solr.server.caching.max.entries=10000 +# Age based cache invalidation is not enabled per default as index watching works better for most cases +# See the descrition of solr.server.check.interval.seconds below for more details +#solr.server.caching.age.seconds=86400 + +# Solr availability and index change check interval: Every x seconds a query for new documents is issued. +# If an index change is detected, caches will be cleared +# +# The check is light (cached by Solr) if the index has not changed and moderate if the index has been +# changed. If the backing index has billions of records and is continuously updated, active checking +# will strain the system. In that case it is recommended to disable active checking and use fixed time +# cache clearing with solr.server.caching.age.seconds instead. +# +# Default is 60 seconds +# Disable by setting to -1 +# If the checking is disabled, consider setting solr.server.caching.age.seconds instead +solr.server.check.interval.seconds=60 + +## Link to this webapp itself. BaseURL for link rewrites must be full url. +wayback.baseurl=http://localhost:8080/solrwayback/ + +#Disable playback if true. Will just show a simple page with error message if playback is clicked. +#Will also prevent showing full size images and download of binaries. +#Tumbnail images in search results will still be shown. +playback.disabled=false + + +#Set to true to prevent SolrWayback url-hacking from accessing Warc-files+offset that is not in the Solr collection. +#This can be done if location+WARC filename+offset is known for a record. +#This will have performance impact. Only set to true if there are other Warc-files mounted on the OS that must not be accessed. +warc.files.verify.collection=false + +# WARC files must be resolvable for playback to work. +# Plain files as well as HTTP URLs are supported. +# For the base case when WARCS have not been moved since index time, the +# RewriteLocationResolver is used with default setup. +# If WARC files are moved to another location after index, different +# implementations of ArcFileLocationResolverInterface are available. +# +# Default resolver: Optionally rewrites the input +warc.file.resolver.class=dk.kb.netarchivesuite.solrwayback.interfaces.RewriteLocationResolver +# Default parameters for RewriteLocationResolver: Return the input path unchanged: +# warc.file.resolver.parameters.path.regexp=.* +# warc.file.resolver.parameters.path.replacement=$0 +# Sample parameters for RewriteLocationResolver that handles changed root location for WARC files, +# where the subfolder structure for the WARCs is preserved: +# warc.file.resolver.parameters.path.regexp=/home/harvester/warcs/(.*) +# warc.file.resolver.parameters.path.replacement=/warcs/$1 +# Sample parameters for RewriteLocationResolver that rewrites to a HTTP server where all WARCs are accessible +# directly under the "warcstore/" folder: +# warc.file.resolver.parameters.path.regexp=.*([^/]*) +# warc.file.resolver.parameters.path.replacement=http://example.com/warcstore/$1 +# +# Mapping resolver: Uses a map of known WARCs +# warc.file.resolver.class=dk.kb.netarchivesuite.solrwayback.interfaces.FileMovedMappingResolver +# The FileMovedMappingResolver MUST have a file containing a list of +# full file paths for known WARCs, where a sample entry in the list could be +# /storage/warcs/col1/mywarc_123.warc.gz +# warc.file.resolver.parameters=/home/user/netarkivet.files +# +# Auto discovery: Scans folders for WARCs. +# IMPORTANT: On a networked drive with millions of WARCs, the scan might take significant time +# and IO resources. Use RewriteLocationResolver or FileMovedMappingResolver where possible. +# warc.file.resolver.class=dk.kb.netarchivesuite.solrwayback.interfaces.AutoFileResolver +# The AutoFileResolver MUST have at least one root to scan from +# warc.file.resolver.parameters.autoresolver.roots=/home/sw/warcs1,/netmounts/colfoo +# Per default, the roots are only scanned on SolrWayback start. +# Sample config for AutoFileResolver for scanning every hour: +# warc.file.resolver.parameters.autoresolver.rescan.enabled=true +# warc.file.resolver.parameters.autoresolver.rescan.seconds=3600 + + +#Collection name. This is the name shown when exporting a page to PID-XML. +pid.collection.name=netarkivet.dk + + +#The possible values for url.normaliser are: normal, legacy and minimal. +# Only change the normaliser type if you know what you are doing. +# Only use minimal if the solr index was build in warc-indexer earlier that 3.0. All SolrWayback bundles have warc-indexer later than this. (Playback quality is drastically reduced) +# Use Legacy for 3.0-3.1 versions of the warc-indexer. +# Use normal for all warc-indexers version 3.2.0+ +url.normaliser=normal + +# Optional list of Solr-params. Format is key1=value1;key2=value2,... +#solr.search.params=f.url_norm.qf=url + +#------- Generate preview screenshots ------------------ +#Used for preview screenshots shown on the page resources overview. Is not required. +#Chrome must be installed on the OS and headless chrome is used to generate the screenshots. +#The setup depend on the OS. + +#Linux: chrome +#Ubunutu: chrome.command=chromium-browser +#Windows: chrome.command=C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe +#MAC1: chrome.command=/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome +#MAC2: chrome.command="open -b com.google.Chrome" +#example command: chromium-browser --headless --disable-gpu --ipc-connection-timeout=3000 --screenshot=test.png --window-size=1280,1024 https://www.google.com/ +chrome.command=chromium-browser + +# This will work on linux. Create the folder yourself +screenshot.temp.imagedir=/home/xxx/solrwayback_screenshots/ +#For windows (create the folder yourself) +#screenshot.temp.imagedir=C:\\solrwayback_screenshots\\ + +#Timeout in seconds. Optional, 10 seconds is default. +screenshot.preview.timeout=20 +#------------------------------------------------------- \ No newline at end of file diff --git a/src/test/resources/properties/solrwaybackweb_unittest.properties b/src/test/resources/properties/solrwaybackweb_unittest.properties new file mode 100644 index 000000000..fd5298f72 --- /dev/null +++ b/src/test/resources/properties/solrwaybackweb_unittest.properties @@ -0,0 +1,99 @@ +##solrwaybackweb.properties (UTF-8) + +## Url to solrwayback backend. +wayback.baseurl=http://localhost:8080/solrwayback/ + +# Only define if solrwayback is not installed right after domain url element. If it is installed as https:kb.dk/covid-collection/solrwayback , then set the property below +#webapp.prefix=/covid-collection/solrwayback/ + +## Only define primary playback engine, if you do not want to use the build in SolrWayback playback engine. Replace {PyWb-server} with url pointing to your PyWb server +#playback.primary.engine=https://{PyWb-server}/web/ + +## Optional additional playback engine. Using collection base playback below will overrule this value. +# To use SolrWayback as alternative playback engine use value: http://localhost:8080/solrwayback/services/web/ +#playback.alternative.engine=http://web.archive.org/web/ + +# Configure alternative playback option. This will be shown as an icon next to the title of the result. If property is not defined, the icon will not be shown +# The alternative playback engine must support the url pattern: http(s)://servername/paths../{crawltime}/{url} +# There are 4 different ways to configure alternative playback engine +# 1) Hardcoded value. (use openwayback.baseurl above). Example: +#openwayback.baseurl=http://web.archive.org/web/ +# 2) Defined for each collection, multiple values. Example: +#alternative.playback.collection.mapping=collectionname1=playbackurl1;collectionname2=playbackurl2 (Urls must end with a slash /) +# 3) Defined by collection field in solr. Example: +#alternative.playback.collection.mapping={$collection}=http://servername1.com/pywb{$collection}/ +# 4) Defined by collection_id field in solr. Example: +#alternative.playback.collection.mapping={$collection_id}=http://servername1.com/pywb{$collection_id}/ + +#alternative.playback.collection.mapping=coronacollection=http://servername.com/pywbcorona/;examplecollection=http://servername1.com/pywbexample/ +#alternative.playback.collection.mapping={$collection}=http://servername1.com/pywb{$collection}/ + +# Playback rewrites the content for webpages, CSS-files etc. This can lead to Out Of Memory for huge files. +# This setting controls the maximum amount of characters that are processed when rewriting. +# Excess characters are ignored. Default is 100MB. +warc.entry.text.max.characters=100000000 + +# Will toogle the warc,csv and zip export options. +allow.export.warc=true +allow.export.csv=true +allow.export.zip=true +# Define fields that can be selected for CVS (and also JSON/JSLON). +export.csv.fields=id,index_time, author, description,keywords,description,license_url,content,content_encoding,content_length,content_language, content_type_droid,content_type_ext,content_type_full,content_type_norm,content_type_served,content_type_tika,content_type,content_type_version,elements_used,hash,wayback_date,crawl_year,url_norm,url_path,url,url_type,domain,host,host_surt,public_suffix,resourcename,image_size,links_images,links_domains,links_hosts,links_hosts_surts,links_norm,links_public_suffixes,links,server,status_code,generator,redirect_to_norm,source_file_path,source_file_offset,source_file,text,title,type,warc_key_id,warc_ip ,ssdeep_hash_bs_3, ssdeep_hash_bs_6, ssdeep_hash_bs_12, ssdeep_hash_bs_24, ssdeep_hash_bs_48, ssdeep_hash_bs_96,ssdeep_hash_bs_192,warc_key_id + +#Limit export size +#10M for CSV , 1M for warc, 10K for warc-expanded +#For warc.expanded the total number of documents can be many times the max-results size. +export.csv.maxresults=10000000 +export.warc.maxresults=1000000 +export.zip.maxresults=1000000 +export.warc.expanded.maxresults=10000 + +# See src/test/resources/solr/netarchivebuilder/conf/schema.xml for possible facet fields +# If the facets are not specified, the default is +# domain, content_type_norm, type, crawl_year, status_code, public_suffix +facets=domain, content_type_norm, type, crawl_year,status_code,public_suffix,status_code + +# Allowed fields to compute stats for. +stats.fields.numeric=content_length, crawl_year, content_text_length, image_height, image_width, image_size +stats.fields.all=links, domain, elements_used, content_type, content_language, links_images, type, content_length, crawl_year, content_text_length, image_height, image_width, image_size + +# Define fields to show when clicking "Show Data fields". Will default to all fields +#fields=id,index_time,author,description,keywords,content_language,content_type_norm,hash + +# About this archive. Will be shown when page is loaded and when about is clicked. +# Search help is shown when the icon next to search is clicked. +# Both properties can be changed to a full filepath with a custom text. HTML formating allowed. +# Below values uses the default text files in SolrWayback. +about.text.file=/about_this_archive.txt +search.help.text.file=/search_help.txt +collection.text.file=/about_collection.txt + +#n-gram and domain statistics etc. needs a start year for the visualizations. Will default to 1998 if not defined. +archive.start.year=1998 + +#Start position for the image geo search. Radius in meter (integer) +#Below coordinates is Denmark, Aarhus +maps.latitude=56.17 +maps.longitude=10.20 +maps.radius=50000 + +leaflet.source=https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png +leaflet.attribution=© OpenStreetMap contributors + +# Wordcloud stop words. + +# Danish +#wordcloud.stopwords=ad,af,aldrig,alle,alt,anden,andet,andre,at,bare,begge,blev,blive,bliver,da,de,dem,den,denne,der,deres,det,dette,dig,din,dine,disse,dit,dog,du,efter,ej,eller,en,end,ene,eneste,enhver,er,et,fem,fik,fire,flere,fleste,for,fordi,forrige,fra,f�,f�r,f�r,god,godt,ham,han,hans,har,havde,have,hej,helt,hende,hendes,her,hos,hun,hvad,hvem,hver,hvilken,hvis,hvor,hvordan,hvorfor,hvorn�r,i,ikke,ind,ingen,intet,ja,jeg,jer,jeres,jo,kan,kom,komme,kommer,kun,kunne,lad,lav,lidt,lige,lille,man,mange,med,meget,men,mens,mere,mig,min,mine,mit,mod,m�,ned,nej,ni,nogen,noget,nogle,nu,ny,nyt,n�r,n�r,n�ste,n�sten,og,ogs�,okay,om,op,os,otte,over,p�,se,seks,selv,ser,ses,sig,sige,sin,sine,sit,skal,skulle,som,stor,store,syv,s�,s�dan,tag,tage,thi,ti,til,to,tre,ud,under,var,ved,vi,vil,ville,vor,vores,v�re,v�ret,alene,allerede,alligevel,altid,bag,blandt,burde,b�r,dens,derefter,derfor,derfra,deri,dermed,derp�,derved,egen,ellers,endnu,ens,enten,flest,foran,f�rst,gennem,gjorde,gjort,g�r,g�re,g�rende,hel,heller,hen,henover,herefter,heri,hermed,herp�,hvilke,hvilkes,hvorefter,hvorfra,hvorhen,hvori,hvorimod,hvorved,igen,igennem,imellem,imens,imod,indtil,langs,lave,lavet,ligesom,l�ngere,mellem,mest,mindre,mindst,m�ske,nemlig,nogensinde,nok,omkring,overalt,samme,sammen,selvom,senere,siden,stadig,synes,syntes,s�ledes,temmelig,tidligere,tilbage,tit,uden,udover,undtagen,via,vore,v�r,�vrigt + +# English +wordcloud.stopwords=i,me,my,myself,we,our,ours,ourselves,you,your,yours,yourself,yourselves,he,him,his,himself,she,her,hers,herself,it,its,itself,they,them,their,theirs,themselves,what,which,who,whom,this,that,these,those,am,is,are,was,were,be,been,being,have,has,had,having,do,does,did,doing,a,an,the,and,but,if,or,because,as,until,while,of,at,by,for,with,about,against,between,into,through,during,before,after,above,below,to,from,up,down,in,out,on,off,over,under,again,further,then,once,here,there,when,where,why,how,all,any,both,each,few,more,most,other,some,such,no,nor,not,only,own,same,so,than,too,very,s,t,can,will,just,don,should,now + +#Disable option to search by uploaded file. +search.uploaded.file.disabled=false +#Number of entries by page for the search (default 20) +search.pagination=20 + +# Show a custom image in top left corner. (png,jpeg,svg etc.). Use full path to image file on OS to change the default value +# Link when clicking the logo +top.left.logo.image=/kb_logo_desktop_blue.svg +top.left.logo.image.link=https://www.kb.dk/ \ No newline at end of file