From 660df9c0000ab4d7b8a41da1a8289052dc1e6525 Mon Sep 17 00:00:00 2001
From: Kent Pitman <kent_pitman@hms.harvard.edu>
Date: Fri, 25 Aug 2023 11:42:56 -0400
Subject: [PATCH] Small cosmetic changes and some additional support for
 upcoming work.

---
 CHANGELOG.rst            |   8 ++-
 dcicutils/misc_utils.py  |  41 ++++++++++++++
 dcicutils/sheet_utils.py | 119 +++++++++++++++++++++++++++------------
 pyproject.toml           |   2 +-
 test/test_misc_utils.py  |  97 ++++++++++++++++++++++++++++---
 test/test_sheet_utils.py |  26 ++++-----
 6 files changed, 233 insertions(+), 60 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index b88893ac8..79f60120f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -19,7 +19,7 @@ Change Log
 
     * Function ``load_items`` that does the same as ``ItemManager.load``.
 
-  * Various low-level implementation classes such as:
+  * Various lower-level implementation classes such as:
 
     * Classes ``XlsxManager``, ``CsvManager`` and ``TsvManager`` for loading raw data
       from ``.xlsx``, ``.csv``, and ``.tsv`` files, respectively.
@@ -27,7 +27,11 @@ Change Log
     * Classes ``XlsxItemManager``, ``CsvItemManager``, and ``TsvItemManager`` for loading Item-style data
       from ``.xlsx``, ``.csv``, and ``.tsv`` files, respectively.
 
-* Contains a fix for a bug in ``ff_utils.get_schema_names`` (`C4-1086 <https://hms-dbmi.atlassian.net/browse/C4-1086>`_).
+* New functionality in ``misc_utils``:
+
+  * New function ``is_uuid`` (migrated from Fourfront)
+  * New function ``pad_to``
+  * New class ``JsonLinesReader``
 
 
 7.9.0
diff --git a/dcicutils/misc_utils.py b/dcicutils/misc_utils.py
index aeca4a326..de188b872 100644
--- a/dcicutils/misc_utils.py
+++ b/dcicutils/misc_utils.py
@@ -9,6 +9,7 @@
 import inspect
 import math
 import io
+import json
 import os
 import logging
 import pytz
@@ -2329,3 +2330,43 @@ def parse_in_radix(text: str, *, radix: int):
     except Exception:
         pass
     raise ValueError(f"Unable to parse: {text!r}")
+
+
+def pad_to(target_size: int, data: list, *, padding=None):
+    actual_size = len(data)
+    if actual_size < target_size:
+        data = data + [padding] * (target_size - actual_size)
+    return data
+
+
+class JsonLinesReader:
+
+    def __init__(self, fp, padded=False, padding=None):
+        self.fp = fp
+        self.padded: bool = padded
+        self.padding = padding
+        self.headers = None  # Might change after we see first line
+
+    def __iter__(self):
+        first_line = True
+        n_headers = 0
+        for raw_line in self.fp:
+            line = json.loads(raw_line)
+            if first_line:
+                first_line = False
+                if isinstance(line, list):
+                    self.headers = line
+                    n_headers = len(line)
+                    continue
+            # If length of line is mroe than we expect, ignore it. Let user put comments beyond our table
+            # But if length of line is less than we expect, extend the line with None
+            if self.headers:
+                if not isinstance(line, list):
+                    raise Exception("If the first line is a list, all lines must be.")
+                if self.padded and len(line) < n_headers:
+                    line = pad_to(n_headers, line, padding=self.padding)
+                yield dict(zip(self.headers, line))
+            elif isinstance(line, dict):
+                yield line
+            else:
+                raise Exception(f"If the first line is not a list, all lines must be dictionaries: {line!r}")
diff --git a/dcicutils/sheet_utils.py b/dcicutils/sheet_utils.py
index 9a83bf6f9..df3e16e43 100644
--- a/dcicutils/sheet_utils.py
+++ b/dcicutils/sheet_utils.py
@@ -3,18 +3,19 @@
 import csv
 import io
 import openpyxl
+import os
 import uuid
 
 from dcicutils.common import AnyJsonData
 from dcicutils.env_utils import public_env_name, EnvUtils
 from dcicutils.ff_utils import get_schema
 from dcicutils.lang_utils import conjoined_list, disjoined_list, maybe_pluralize
-from dcicutils.misc_utils import ignored, PRINT
+from dcicutils.misc_utils import ignored, PRINT, pad_to
 from dcicutils.task_utils import pmap
 from openpyxl.worksheet.worksheet import Worksheet
 from openpyxl.workbook.workbook import Workbook
 from tempfile import TemporaryFile
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Type, Union
 
 
 Header = str
@@ -334,7 +335,7 @@ def __init__(self, **kwargs):
 
     # TODO: Consider whether this should be an abstractmethod (but first see detailed design note at top of class.)
     @classmethod
-    def load(cls, filename: str) -> Dict[str, List[AnyJsonData]]:
+    def load(cls, filename: str, **kwargs) -> Dict[str, List[AnyJsonData]]:
         """
         Reads a filename and returns a dictionary that maps sheet names to rows of dictionary data.
         For more information, see documentation of AbstractTableSetManager.
@@ -354,6 +355,8 @@ class BasicTableSetManager(AbstractTableSetManager):
     of this where there's only one set of headers and only one block of content.
     """
 
+    ALLOWED_FILE_EXTENSIONS: List[str] = []
+
     def __init__(self, filename: str, **kwargs):
         super().__init__(**kwargs)
         self.filename: str = filename
@@ -387,17 +390,26 @@ def load_content(self) -> Any:
 
 
 class TableSetManager(BasicTableSetManager):
-
-    ALLOWED_FILE_EXTENSIONS = None
+    """
+    This is the base class for all things that read tablesets. Those may be:
+    * Excel workbook readers (.xlsx)
+    * Comma-separated file readers (.csv)
+    * Tab-separarated file readers (.tsv in most of the world, but Microsoft stupidly calls this .txt, outright
+      refusing to write a .tsv file, so many people seem to compromise and call this .tsv.txt)
+    Unimplemented formats that could easily be made to do the same thing:
+    * JSON files
+    * JSON lines files
+    * YAML files
+    """
 
     @classmethod
-    def load(cls, filename: str) -> AnyJsonData:
+    def load(cls, filename: str, **kwargs) -> AnyJsonData:
         if cls.ALLOWED_FILE_EXTENSIONS:
             if not any(filename.lower().endswith(suffix) for suffix in cls.ALLOWED_FILE_EXTENSIONS):
                 raise LoadArgumentsError(f"The TableSetManager subclass {cls.__name__} expects only"
                                          f" {disjoined_list(cls.ALLOWED_FILE_EXTENSIONS)} filenames: {filename}")
 
-        table_set_manager: TableSetManager = cls(filename)
+        table_set_manager: TableSetManager = cls(filename, **kwargs)
         return table_set_manager.load_content()
 
     def __init__(self, filename: str, **kwargs):
@@ -432,6 +444,33 @@ def parse_cell_value(cls, value: SheetCellValue) -> AnyJsonData:
         return prefer_number(value)
 
 
+class TableSetManagerRegistry:
+
+    ALL_TABLE_SET_MANAGERS: Dict[str, Type[TableSetManager]] = {}
+
+    @classmethod
+    def register(cls, class_to_register: Type[TableSetManager]):
+        for ext in class_to_register.ALLOWED_FILE_EXTENSIONS:
+            existing = cls.ALL_TABLE_SET_MANAGERS.get(ext)
+            if existing:
+                raise Exception(f"Tried to define {class_to_register} to extension {ext},"
+                                f" but {existing} already claimed that.")
+            cls.ALL_TABLE_SET_MANAGERS[ext] = class_to_register
+        return class_to_register
+
+    @classmethod
+    def manager_for_filename(cls, filename: str) -> Type[TableSetManager]:
+        base = os.path.basename(filename)
+        dotparts = base.split('.')
+        while dotparts:
+            suffix = f".{'.'.join(dotparts)}"
+            found = cls.ALL_TABLE_SET_MANAGERS.get(suffix)
+            if found:
+                return found
+            dotparts = dotparts[1:]
+        raise LoadArgumentsError(f"Unknown file type: {filename}")
+
+
 class XlsxManager(TableSetManager):
     """
     This implements the mechanism to get a series of rows out of the sheets in an XLSX file.
@@ -484,7 +523,7 @@ class SchemaAutoloadMixin(AbstractTableSetManager):
 
     SCHEMA_CACHE = {}  # Shared cache. Do not override. Use .clear_schema_cache() to clear it.
     CACHE_SCHEMAS = True  # Controls whether we're doing caching at all
-    AUTOLOAD_SCHEMAS_DEFAULT = False
+    AUTOLOAD_SCHEMAS_DEFAULT = True
 
     def __init__(self, autoload_schemas: Optional[bool] = None, portal_env: Optional[str] = None, **kwargs):
         if portal_env is None:
@@ -592,6 +631,7 @@ def parse_cell_value(self, value: SheetCellValue) -> AnyJsonData:
         return ItemTools.parse_item_value(value, context=self._instaguid_context_table)
 
 
+@TableSetManagerRegistry.register
 class XlsxItemManager(ItemManagerMixin, XlsxManager):
     """
     This layers item-style row processing functionality on an XLSX file.
@@ -599,29 +639,35 @@ class XlsxItemManager(ItemManagerMixin, XlsxManager):
     pass
 
 
-class CsvManager(TableSetManager):
-    """
-    This implements the mechanism to get a series of rows out of the sheet in a csv file,
-    returning a result that still looks like there could have been multiple tabs.
-    """
-
-    ALLOWED_FILE_EXTENSIONS = ['.csv']
+class SingleTableMixin(AbstractTableSetManager):
 
     DEFAULT_TAB_NAME = 'Sheet1'
 
-    def __init__(self, filename: str, tab_name: Optional[str] = None, **kwargs):
-        super().__init__(filename=filename, **kwargs)
+    def __init__(self, tab_name: Optional[str] = None, **kwargs):
+        super().__init__(**kwargs)
         self.tab_name = tab_name or self.DEFAULT_TAB_NAME
 
     @property
     def tabnames(self) -> List[str]:
         return [self.tab_name]
 
+
+class CsvManager(SingleTableMixin, TableSetManager):
+    """
+    This implements the mechanism to get a series of rows out of the sheet in a csv file,
+    returning a result that still looks like there could have been multiple tabs.
+    """
+
+    ALLOWED_FILE_EXTENSIONS = ['.csv']
+
+    def __init__(self, filename: str, **kwargs):
+        super().__init__(filename=filename, **kwargs)
+
     def _get_reader_agent(self) -> CsvReader:
-        return self._get_csv_reader(self.filename)
+        return self._get_reader_agent_for_filename(self.filename)
 
     @classmethod
-    def _get_csv_reader(cls, filename) -> CsvReader:
+    def _get_reader_agent_for_filename(cls, filename) -> CsvReader:
         return csv.reader(open_text_input_file_respecting_byte_order_mark(filename))
 
     PAD_TRAILING_TABS = True
@@ -630,9 +676,8 @@ def _raw_row_generator_for_tabname(self, tabname: str) -> Iterable[SheetRow]:
         headers = self.tab_headers(tabname)
         n_headers = len(headers)
         for row_data in self.reader_agent:
-            n_cols = len(row_data)
-            if self.PAD_TRAILING_TABS and n_cols < n_headers:
-                row_data = row_data + [''] * (n_headers - n_cols)
+            if self.PAD_TRAILING_TABS:
+                row_data = pad_to(n_headers, row_data, padding='')
             yield row_data
 
     def _create_tab_processor_state(self, tabname: str) -> Headers:
@@ -647,6 +692,7 @@ def _process_row(self, tabname: str, headers: Headers, row_data: SheetRow) -> An
                 for i, row_datum in enumerate(row_data)}
 
 
+@TableSetManagerRegistry.register
 class CsvItemManager(ItemManagerMixin, CsvManager):
     """
     This layers item-style row processing functionality on a CSV file.
@@ -666,7 +712,7 @@ def __init__(self, filename: str, escaping: Optional[bool] = None, **kwargs):
         self.escaping: bool = escaping or False
 
     @classmethod
-    def _get_csv_reader(cls, filename) -> CsvReader:
+    def _get_reader_agent_for_filename(cls, filename) -> CsvReader:
         return csv.reader(open_text_input_file_respecting_byte_order_mark(filename), delimiter='\t')
 
     def parse_cell_value(self, value: SheetCellValue) -> AnyJsonData:
@@ -699,6 +745,7 @@ def expand_escape_sequences(cls, text: str) -> str:
         return s.getvalue()
 
 
+@TableSetManagerRegistry.register
 class TsvItemManager(ItemManagerMixin, TsvManager):
     """
     This layers item-style row processing functionality on a TSV file.
@@ -714,24 +761,22 @@ class ItemManager(AbstractTableSetManager):
 
     @classmethod
     def create_implementation_manager(cls, filename: str, **kwargs) -> BasicTableSetManager:
-        if filename.endswith(".xlsx"):
-            reader_agent = XlsxItemManager(filename, **kwargs)
-        elif filename.endswith(".csv"):
-            tab_name = kwargs.pop('tab_name', None)
-            reader_agent = CsvItemManager(filename, tab_name=tab_name, **kwargs)
-        elif filename.endswith(".tsv"):
-            escaping = kwargs.pop('escaping', None)
-            tab_name = kwargs.pop('tab_name', None)
-            reader_agent = TsvItemManager(filename, escaping=escaping, tab_name=tab_name, **kwargs)
-        else:
-            raise LoadArgumentsError(f"Unknown file type: {filename}")
+        reader_agent_class = TableSetManagerRegistry.manager_for_filename(filename)
+        reader_agent = reader_agent_class(filename, **kwargs)
         return reader_agent
 
     @classmethod
-    def load(cls, filename: str, tab_name: Optional[str] = None, escaping: Optional[bool] = None,
-             schemas: Optional[Dict] = None, autoload_schemas: Optional[bool] = None) -> AnyJsonData:
+    def load(cls, filename: str,
+             tab_name: Optional[str] = None,
+             escaping: Optional[bool] = None,
+             schemas: Optional[Dict] = None,
+             autoload_schemas: Optional[bool] = None,
+             **kwargs) -> Dict[str, List[AnyJsonData]]:
+        """
+        Given a filename and various options
+        """
         manager = cls.create_implementation_manager(filename, tab_name=tab_name, escaping=escaping, schemas=schemas,
-                                                    autoload_schemas=autoload_schemas)
+                                                    autoload_schemas=autoload_schemas, **kwargs)
         return manager.load_content()
 
 
diff --git a/pyproject.toml b/pyproject.toml
index aaa4371f7..b3e907b9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "7.9.0.1b2"  # to become "7.10.0"
+version = "7.9.0.1b3"  # to become "7.10.0"
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/test/test_misc_utils.py b/test/test_misc_utils.py
index b940877f5..0017bd16e 100644
--- a/test/test_misc_utils.py
+++ b/test/test_misc_utils.py
@@ -30,12 +30,13 @@
     classproperty, classproperty_cached, classproperty_cached_each_subclass, Singleton, NamedObject, obsolete,
     ObsoleteError, CycleError, TopologicalSorter, keys_and_values_to_dict, dict_to_keys_and_values, is_c4_arn,
     deduplicate_list, chunked, parse_in_radix, format_in_radix, managed_property, future_datetime,
-    MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, to_camel_case,
+    MIN_DATETIME, MIN_DATETIME_UTC, INPUT, builtin_print, map_chunked, to_camel_case, pad_to, JsonLinesReader,
 )
 from dcicutils.qa_utils import (
     Occasionally, ControlledTime, override_environ as qa_override_environ, MockFileSystem, printed_output,
     raises_regexp, MockId, MockLog, input_series,
 )
+from typing import Any, Dict, List
 from unittest import mock
 
 
@@ -1094,7 +1095,7 @@ def test_lockout_manager():
 
     protected_action = "simulated action"
 
-    # The function now() will get us the time. This assure us that binding datetime.datetime
+    # The function now() will get us the time. This assures us that binding datetime.datetime
     # will not be affecting us.
     now = datetime_module.datetime.now
 
@@ -1197,7 +1198,7 @@ def test_rate_manager():
     # PyCharm thinks this is not used. -kmp 26-Jul-2020
     # r = RateManager(interval_seconds=60, safety_seconds=1, allowed_attempts=4)
 
-    # The function now() will get us the time. This assure us that binding datetime.datetime
+    # The function now() will get us the time. This assures us that binding datetime.datetime
     # will not be affecting us.
     now = datetime_module.datetime.now
 
@@ -1885,7 +1886,7 @@ def test_cached_field_mocked(self):
             assert field.get() == val5
             assert field.get() == val5
 
-            dt.sleep(self.DEFAULT_TIMEOUT)  # Fast forward to where we're going to refill again
+            dt.sleep(self.DEFAULT_TIMEOUT)  # Fast-forward to where we're going to refill again
             val6 = field.get()
             assert val6 != val5
 
@@ -2077,7 +2078,7 @@ def test_copy_json(obj):
 
 
 def test_copy_json_side_effects():
-    obj = {'foo': [1, 2, 3], 'bar': [{'x': 4, 'y': 5}, {'x': 2, 'y': 7}]}
+    obj: Dict[str, Any] = {'foo': [1, 2, 3], 'bar': [{'x': 4, 'y': 5}, {'x': 2, 'y': 7}]}
     obj_copy = copy_json(obj)
     obj['foo'][1] = 20
     obj['bar'][0]['y'] = 500  # NoQA - PyCharm wrongly fears there are type errors in this line, that it will fail.
@@ -2931,7 +2932,7 @@ class SubClock(Clock):
     assert str(exc.value) == ("The subclasses= argument to classproperty_cached.reset must not be False"
                               " because classproperty_cached does not use per-subclass caches.")
 
-    # This will clear SubClock cache, bu that's shared with the Clock cache, so both will clear.
+    # This will clear SubClock cache, but that's shared with the Clock cache, so both will clear.
     assert classproperty_cached.reset(instance_class=SubClock, attribute_name='sample') is True
 
     c_t5 = Clock.sample     # This should recompute Clock.sample cache, which is shared by SubCLock
@@ -3285,7 +3286,7 @@ def test_deduplicate_list():
     xlen = len(x)
 
     assert sorted(deduplicate_list(x)) == ['a', 'b', 'c']
-    assert len(x) == xlen  # make sure there was no side-effect to the original list
+    assert len(x) == xlen  # make sure there was no side effect to the original list
 
     y = ['a']
     y0 = deduplicate_list(y)
@@ -3495,3 +3496,85 @@ def test_map_chunked():
 
     res = map_chunked(lambda x: ''.join(x), "abcdefghij", chunk_size=4, reduce=lambda x: '.'.join(x))
     assert res == 'abcd.efgh.ij'
+
+
+def test_pad_to():
+
+    assert pad_to(5, []) == [None, None, None, None, None]
+    assert pad_to(5, [], padding='foo') == ['foo', 'foo', 'foo', 'foo', 'foo']
+
+    assert pad_to(5, ['x']) == ['x', None, None, None, None]
+    assert pad_to(5, ['x'], padding='foo') == ['x', 'foo', 'foo', 'foo', 'foo']
+
+    six_elements = ['a', 'b', 'c', 'd', 'e', 'f']
+
+    assert pad_to(5, six_elements) == six_elements
+    assert pad_to(5, six_elements, padding='foo')
+
+
+def test_json_lines_reader_dicts():
+
+    print()  # start on a fresh line
+
+    mfs = MockFileSystem()
+
+    with mfs.mock_exists_open_remove():
+
+        item1 = {"foo": 1, "bar": 2}
+        item2 = {"foo": 3, "bar": 4}
+
+        item1_str = json.dumps(item1)
+        item2_str = json.dumps(item2)
+
+        sample_lines = [item1_str, item2_str]
+
+        sample_filename = "somefile.jsonl"
+
+        with io.open(sample_filename, 'w') as fp:
+            for line in sample_lines:
+                print(line, file=fp)
+
+        for file, content in mfs.files.items():
+            print("=" * 20, file, "=" * 20)
+            print(content.decode('utf-8'))
+            print("=" * 80)
+
+        with io.open(sample_filename) as fp:
+            assert [line for line in JsonLinesReader(fp)] == [item1, item2]
+
+
+def test_json_lines_reader_lists():
+
+    print()  # start on a fresh line
+
+    mfs = MockFileSystem()
+
+    with mfs.mock_exists_open_remove():
+
+        item1 = {"foo": 1, "bar": 2}
+        item2 = {"foo": 3, "bar": 4}
+
+        headers: List[str] = list(item1.keys())
+
+        item1_str = json.dumps([item1[header] for header in headers])
+        item2_str = json.dumps([item2[header] for header in headers])
+
+        sample_lines = [item1_str, item2_str]
+
+        sample_filename = "somefile.jsonl"
+
+        with io.open(sample_filename, 'w') as fp:
+
+            print(json.dumps(headers), file=fp)
+            for line in sample_lines:
+                print(line, file=fp)
+
+        for file, content in mfs.files.items():
+            print("=" * 20, file, "=" * 20)
+            print(content.decode('utf-8'))
+            print("=" * 80)
+
+        with io.open(sample_filename) as fp:
+            parsed = [line for line in JsonLinesReader(fp)]
+            expected = [item1, item2]
+            assert parsed == expected
diff --git a/test/test_sheet_utils.py b/test/test_sheet_utils.py
index 8557e1278..ae3096632 100644
--- a/test/test_sheet_utils.py
+++ b/test/test_sheet_utils.py
@@ -376,13 +376,13 @@ def test_xlsx_manager_load_csv():
 
 def test_xlsx_item_manager_load_content():
 
-    it = XlsxItemManager(SAMPLE_XLSX_FILE)
+    it = XlsxItemManager(SAMPLE_XLSX_FILE, autoload_schemas=False)
     assert it.load_content() == SAMPLE_XLSX_FILE_ITEM_CONTENT
 
 
 def test_xlsx_item_manager_load():
 
-    assert XlsxItemManager.load(SAMPLE_XLSX_FILE) == SAMPLE_XLSX_FILE_ITEM_CONTENT
+    assert XlsxItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False) == SAMPLE_XLSX_FILE_ITEM_CONTENT
 
 
 def test_xlsx_item_manager_load_csv():
@@ -414,19 +414,19 @@ def test_csv_manager_load_csv():
 
 def test_csv_item_manager_load_content():
 
-    it = CsvItemManager(SAMPLE_CSV_FILE)
+    it = CsvItemManager(SAMPLE_CSV_FILE, autoload_schemas=False)
     assert it.load_content() == SAMPLE_CSV_FILE_ITEM_CONTENT
 
 
 def test_csv_item_manager_load():
 
-    assert CsvItemManager.load(SAMPLE_CSV_FILE) == SAMPLE_CSV_FILE_ITEM_CONTENT
+    assert CsvItemManager.load(SAMPLE_CSV_FILE, autoload_schemas=False) == SAMPLE_CSV_FILE_ITEM_CONTENT
 
 
 def test_csv_item_manager_load_csv():
 
     with pytest.raises(LoadArgumentsError) as exc:
-        CsvItemManager.load(SAMPLE_XLSX_FILE)
+        CsvItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False)
     assert str(exc.value).startswith('The TableSetManager subclass CsvItemManager'
                                      ' expects only .csv filenames:')
 
@@ -460,30 +460,30 @@ def test_tsv_manager_load_csv():
 
 def test_tsv_item_manager_load_content():
 
-    it = TsvItemManager(SAMPLE_TSV_FILE)
+    it = TsvItemManager(SAMPLE_TSV_FILE, autoload_schemas=False)
     assert it.load_content() == SAMPLE_TSV_FILE_ITEM_CONTENT
 
 
 def test_tsv_item_manager_load():
 
-    assert TsvItemManager.load(SAMPLE_TSV_FILE) == SAMPLE_TSV_FILE_ITEM_CONTENT
+    assert TsvItemManager.load(SAMPLE_TSV_FILE, autoload_schemas=False) == SAMPLE_TSV_FILE_ITEM_CONTENT
 
 
 def test_tsv_item_manager_load_csv():
 
     with pytest.raises(LoadArgumentsError) as exc:
-        TsvItemManager.load(SAMPLE_XLSX_FILE)
+        TsvItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False)
     assert str(exc.value).startswith('The TableSetManager subclass TsvItemManager'
                                      ' expects only .tsv or .tsv.txt filenames:')
 
 
 def test_item_manager_load():
 
-    assert ItemManager.load(SAMPLE_XLSX_FILE) == SAMPLE_XLSX_FILE_ITEM_CONTENT
+    assert ItemManager.load(SAMPLE_XLSX_FILE, autoload_schemas=False) == SAMPLE_XLSX_FILE_ITEM_CONTENT
 
-    assert ItemManager.load(SAMPLE_CSV_FILE) == SAMPLE_CSV_FILE_ITEM_CONTENT
+    assert ItemManager.load(SAMPLE_CSV_FILE, autoload_schemas=False) == SAMPLE_CSV_FILE_ITEM_CONTENT
 
-    assert ItemManager.load(SAMPLE_TSV_FILE) == SAMPLE_TSV_FILE_ITEM_CONTENT
+    assert ItemManager.load(SAMPLE_TSV_FILE, autoload_schemas=False) == SAMPLE_TSV_FILE_ITEM_CONTENT
 
     with pytest.raises(LoadArgumentsError) as exc:
         ItemManager.load("something.else")
@@ -492,9 +492,9 @@ def test_item_manager_load():
 
 def test_load_items():
 
-    assert load_items(SAMPLE_XLSX_FILE) == SAMPLE_XLSX_FILE_ITEM_CONTENT
+    assert load_items(SAMPLE_XLSX_FILE, autoload_schemas=False) == SAMPLE_XLSX_FILE_ITEM_CONTENT
 
-    assert load_items(SAMPLE_CSV_FILE) == SAMPLE_CSV_FILE_ITEM_CONTENT
+    assert load_items(SAMPLE_CSV_FILE, autoload_schemas=False) == SAMPLE_CSV_FILE_ITEM_CONTENT
 
     with pytest.raises(LoadArgumentsError) as exc:
         load_items("something.else")