Skip to content

Commit

Permalink
Merge pull request #10295 from drons/mitabUTF8
Browse files Browse the repository at this point in the history
MITAB: Disable table fields "laundering" for non-neutral charset and add UTF-8 charset
  • Loading branch information
rouault authored Jun 30, 2024
2 parents 6445593 + a02dab3 commit 8f5d4de
Show file tree
Hide file tree
Showing 13 changed files with 225 additions and 41 deletions.
3 changes: 3 additions & 0 deletions autotest/ogr/data/mitab/utf8.mid
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"Значение А","Значение Б","Значение В","Значение Г","Значение Д"
"Значение 1","Значение 2","Значение 3","Значение 4","Значение 5"
"Полигон","Синий","Заливка","А а Б б","ЪЫЁЩ"
31 changes: 31 additions & 0 deletions autotest/ogr/data/mitab/utf8.mif
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Version 1520
Charset "UTF-8"
Delimiter ","
CoordSys Earth Projection 8, 1001, "m", 39, 0, 1, 7500000, 0 Bounds (-749281.53901, -10002137.4978) (15749281.539, 10002137.4978)
Columns 5
Поле_А Char(10)
Поле_Б Char(10)
Поле_В Char(10)
Поле_Г Char(10)
Поле_Д Char(10)
Data

Point 7404648.72 6144520.22
Symbol (35,16711680,12)
Pline 4
7404638.32 6144512.27
7404646.55 6144515.77
7404653.33 6144520.94
7404657.51 6144525.21
Pen (2,2,65280)
Region 1
6
7404649.37 6144522.85
7404646.78 6144518.96
7404642.44 6144519.88
7404642.59 6144522.85
7404645.79 6144523.76
7404649.37 6144522.85
Pen (1,2,0)
Brush (2,16777215,16777215)
Center 7404645.9 6144521.36
59 changes: 58 additions & 1 deletion autotest/ogr/ogr_mitab.py
Original file line number Diff line number Diff line change
Expand Up @@ -1980,7 +1980,7 @@ def test_ogr_mitab_45(tmp_vsimem, frmt, lyrCount):
# Test read MapInfo layers with encoding specified


@pytest.mark.parametrize("fname", ("tab-win1251.TAB", "win1251.mif"))
@pytest.mark.parametrize("fname", ("tab-win1251.TAB", "win1251.mif", "utf8.mif"))
def test_ogr_mitab_46(fname):

fldNames = ["Поле_А", "Поле_Б", "Поле_В", "Поле_Г", "Поле_Д"]
Expand Down Expand Up @@ -2268,6 +2268,63 @@ def test_ogr_mitab_tab_write_field_name_with_dot(tmp_vsimem):
ds = None


###############################################################################


@pytest.mark.parametrize("ext", ["mif", "tab"])
def test_ogr_mitab_write_utf8_field_name(tmp_vsimem, ext):

tmpfile = tmp_vsimem / f"ogr_mitab_tab_write_utf8_field_name.{ext}"
ds = ogr.GetDriverByName("MapInfo File").CreateDataSource(
tmpfile, options=["ENCODING=UTF-8", f"FORMAT={ext}"]
)
lyr = ds.CreateLayer("test")
lyr.CreateField(ogr.FieldDefn("地市", ogr.OFTInteger))
f = ogr.Feature(lyr.GetLayerDefn())
f["地市"] = 1
f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT(2 3)"))
lyr.CreateFeature(f)
with gdal.quiet_errors():
ds = None

ds = ogr.Open(tmpfile)
lyr = ds.GetLayer(0)
f = lyr.GetNextFeature()
assert f["地市"] == 1
ds = None


###############################################################################


@pytest.mark.parametrize("ext", ["mif", "tab"])
@pytest.mark.parametrize("dsStrictOpt", [False, True])
def test_ogr_mitab_non_strict_fields_laundering(tmp_vsimem, ext, dsStrictOpt):

tmpfile = tmp_vsimem / f"ogr_mitab_non_strict_fields_laundering.{ext}"
dsOpt = [f"FORMAT={ext}"]
lyrOpt = []
if dsStrictOpt:
dsOpt.append("STRICT_FIELDS_NAME_LAUNDERING=NO")
else:
lyrOpt.append("STRICT_FIELDS_NAME_LAUNDERING=NO")
ds = ogr.GetDriverByName("MapInfo File").CreateDataSource(tmpfile, options=dsOpt)
lyr = ds.CreateLayer("test", options=lyrOpt)
lyr.CreateField(ogr.FieldDefn("dot.and space", ogr.OFTInteger))
f = ogr.Feature(lyr.GetLayerDefn())
f["dot.and space"] = 1
f.SetGeometryDirectly(ogr.CreateGeometryFromWkt("POINT(2 3)"))
lyr.CreateFeature(f)
with gdal.quiet_errors():
ds = None

ds = ogr.Open(tmpfile)
lyr = ds.GetLayer(0)
f = lyr.GetNextFeature()
assert f["dot.and_space"] == 1
ds = None


###############################################################################
# Test read text labels with local encoding from mif/mid file

Expand Down
1 change: 1 addition & 0 deletions doc/source/drivers/vector/mapinfo_encodings.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"ENCODING value","MapInfo charset",description
""""" (empty string)",Neutral,"No character conversions performed."
"UTF-8","UTF-8","UTF-8 (Works with recent MapInfo versions, since v15.2)"
"ISO-8859-1","ISO8859_1","ISO 8859-1 (UNIX)"
"ISO-8859-2","ISO8859_2","ISO 8859-2 (UNIX)"
"ISO-8859-3","ISO8859_3","ISO 8859-3 (UNIX)"
Expand Down
16 changes: 16 additions & 0 deletions doc/source/drivers/vector/mitab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ The following dataset creation options are supported:
bytes. Any MapInfo version should be able to handle block sizes from
512 to 32256.

- .. dsco:: STRICT_FIELDS_NAME_LAUNDERING
:choices: YES, NO
:default: YES
:since: 3.10

Replaces all non alphanumeric characters in dataset's field names by
`_` (underscope). For recent MapInfo can be set to `NO`.

Layer Creation Options
~~~~~~~~~~~~~~~~~~~~~~

Expand Down Expand Up @@ -166,6 +174,14 @@ The following layer creation options are supported:
TAB format). Friendly names can be up to 256 characters long and can include
most ASCII characters. Supported by MapInfo Pro v15.0 or higher.

- .. lco:: STRICT_FIELDS_NAME_LAUNDERING
:choices: YES, NO
:default: YES
:since: 3.10

Replaces all non alphanumeric characters in layer's field names by
`_` (underscope). For recent MapInfo can be set to `NO`.

Configuration options
~~~~~~~~~~~~~~~~~~~~~

Expand Down
5 changes: 4 additions & 1 deletion ogr/ogrsf_frmts/mitab/mitab.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class IMapInfoFile CPL_NON_FINAL : public OGRLayer
GBool m_bBoundsSet;

char *m_pszCharset;
bool m_bStrictLaundering = true;
std::set<CPLString> m_oSetFields{};
TABFeature *CreateTABFeature(OGRFeature *poFeature);

Expand Down Expand Up @@ -200,6 +201,7 @@ class IMapInfoFile CPL_NON_FINAL : public OGRLayer

void SetEncoding(const char *);
const char *GetEncoding() const;
virtual void SetStrictLaundering(bool);
int TestUtf8Capability() const;
CPLString NormalizeFieldName(const char *pszName) const;
///////////////
Expand Down Expand Up @@ -403,6 +405,7 @@ class TABFile final : public IMapInfoFile

int WriteFeature(TABFeature *poFeature);
virtual int SetCharset(const char *pszCharset) override;
virtual void SetStrictLaundering(bool bStrictLaundering) override;
#ifdef DEBUG
virtual void Dump(FILE *fpOut = nullptr) override;
#endif
Expand Down Expand Up @@ -906,7 +909,7 @@ class MIFFile final : public IMapInfoFile
/* { return m_poMAPFile->GetHeaderBlock()->SetProjInfo( poPI ); }*/
virtual int SetMIFCoordSys(const char *pszMIFCoordSys) override;
virtual int SetCharset(const char *pszCharset) override;

virtual void SetStrictLaundering(bool bStrictLaundering) override;
#ifdef DEBUG
virtual void Dump(FILE * /*fpOut*/ = nullptr) override
{
Expand Down
6 changes: 6 additions & 0 deletions ogr/ogrsf_frmts/mitab/mitab_imapinfofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ static const char *const apszCharsets[][2] = {
{"CodePage869", "CP869"}, // DOS Code Page 869 = Modern Greek
{"LICS", ""}, // Lotus worksheet release 1,2 character set
{"LMBCS", ""}, // Lotus worksheet release 3,4 character set
{"UTF-8", "UTF-8"},
{nullptr, nullptr}};

const char *IMapInfoFile::CharsetToEncoding(const char *pszCharset)
Expand Down Expand Up @@ -683,6 +684,11 @@ void IMapInfoFile::SetEncoding(const char *pszEncoding)
SetCharset(EncodingToCharset(pszEncoding));
}

void IMapInfoFile::SetStrictLaundering(bool bStrictLaundering)
{
m_bStrictLaundering = bStrictLaundering;
}

int IMapInfoFile::TestUtf8Capability() const
{
const char *pszEncoding(GetEncoding());
Expand Down
16 changes: 15 additions & 1 deletion ogr/ogrsf_frmts/mitab/mitab_miffile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,8 @@ int MIFFile::WriteMIFHeader()
if (strlen(GetEncoding()) > 0)
osFieldName.Recode(CPL_ENC_UTF8, GetEncoding());

char *pszCleanName = TABCleanFieldName(osFieldName);
char *pszCleanName =
TABCleanFieldName(osFieldName, GetEncoding(), m_bStrictLaundering);
osFieldName = pszCleanName;
CPLFree(pszCleanName);

Expand Down Expand Up @@ -1946,9 +1947,22 @@ int MIFFile::SetCharset(const char *pszCharset)
{
m_poMIFFile->SetEncoding(CharsetToEncoding(pszCharset));
}
if (EQUAL(pszCharset, "UTF-8"))
{
m_nVersion = std::max(m_nVersion, 1520);
}
return 0;
}

void MIFFile::SetStrictLaundering(bool bStrictLaundering)
{
IMapInfoFile::SetStrictLaundering(bStrictLaundering);
if (!bStrictLaundering)
{
m_nVersion = std::max(m_nVersion, 1520);
}
}

/************************************************************************/
/* MIFFile::GetSpatialRef() */
/************************************************************************/
Expand Down
14 changes: 12 additions & 2 deletions ogr/ogrsf_frmts/mitab/mitab_ogr_datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ int OGRTABDataSource::Create(const char *pszName, char **papszOptions)
IMapInfoFile *poFile = nullptr;
const char *pszEncoding(CSLFetchNameValue(papszOptions, "ENCODING"));
const char *pszCharset(IMapInfoFile::EncodingToCharset(pszEncoding));

bool bStrictLaundering = CPLTestBool(CSLFetchNameValueDef(
papszOptions, "STRICT_FIELDS_NAME_LAUNDERING", "YES"));
if (m_bCreateMIF)
{
poFile = new MIFFile(this);
Expand All @@ -158,7 +159,7 @@ int OGRTABDataSource::Create(const char *pszName, char **papszOptions)
}
poFile = poTabFile;
}

poFile->SetStrictLaundering(bStrictLaundering);
m_nLayerCount = 1;
m_papoLayers = static_cast<IMapInfoFile **>(CPLMalloc(sizeof(void *)));
m_papoLayers[0] = poFile;
Expand Down Expand Up @@ -312,6 +313,14 @@ OGRTABDataSource::ICreateLayer(const char *pszLayerName,
const char *pszEncoding = CSLFetchNameValue(papszOptions, "ENCODING");
const char *pszCharset(IMapInfoFile::EncodingToCharset(pszEncoding));
const char *pszDescription(CSLFetchNameValue(papszOptions, "DESCRIPTION"));
const char *pszStrictLaundering =
CSLFetchNameValue(papszOptions, "STRICT_FIELDS_NAME_LAUNDERING");
if (pszStrictLaundering == nullptr)
{
pszStrictLaundering = CSLFetchNameValueDef(
m_papszOptions, "STRICT_FIELDS_NAME_LAUNDERING", "YES");
}
bool bStrictLaundering = CPLTestBool(pszStrictLaundering);

if (m_bSingleFile)
{
Expand Down Expand Up @@ -376,6 +385,7 @@ OGRTABDataSource::ICreateLayer(const char *pszLayerName,
}

poFile->SetDescription(poFile->GetName());
poFile->SetStrictLaundering(bStrictLaundering);

// Assign the coordinate system (if provided) and set
// reasonable bounds.
Expand Down
6 changes: 6 additions & 0 deletions ogr/ogrsf_frmts/mitab/mitab_ogr_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ void RegisterOGRTAB()
"description='Friendly name of table. Only for tab "
"format.'/>" // See
// https://support.pitneybowes.com/SearchArticles/VFP05_KnowledgeWithSidebarHowTo?id=kA180000000CtuHCAS&popup=false&lang=en_US
" <Option name='STRICT_FIELDS_NAME_LAUNDERING' type='boolean' "
"default='YES' description='Field name consisting of alphanumeric "
"only, maximum length 31'/>"
"</LayerCreationOptionList>");

poDriver->SetMetadataItem(
Expand All @@ -248,6 +251,9 @@ void RegisterOGRTAB()
" <Option name='ENCODING' type='string' description='to override the "
"encoding interpretation of the DAT/MID with any encoding supported by "
"CPLRecode or to \"\" to avoid any recoding (Neutral charset)'/>"
" <Option name='STRICT_FIELDS_NAME_LAUNDERING' type='boolean' "
"default='YES' description='Field name consisting of alphanumeric "
"only, maximum length 31'/>"
"</CreationOptionList>");

poDriver->SetMetadataItem(
Expand Down
16 changes: 15 additions & 1 deletion ogr/ogrsf_frmts/mitab/mitab_tabfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,7 +1189,8 @@ int TABFile::WriteTABFile()
if (strlen(GetEncoding()) > 0)
osFieldName.Recode(CPL_ENC_UTF8, GetEncoding());

char *pszCleanName = TABCleanFieldName(osFieldName);
char *pszCleanName = TABCleanFieldName(
osFieldName, GetEncoding(), m_bStrictLaundering);
osFieldName = pszCleanName;
CPLFree(pszCleanName);

Expand Down Expand Up @@ -1769,9 +1770,22 @@ int TABFile::SetCharset(const char *pszCharset)
{
m_poMAPFile->SetEncoding(CharsetToEncoding(pszCharset));
}
if (EQUAL(pszCharset, "UTF-8"))
{
m_nVersion = std::max(m_nVersion, 1520);
}
return 0;
}

void TABFile::SetStrictLaundering(bool bStrictLaundering)
{
IMapInfoFile::SetStrictLaundering(bStrictLaundering);
if (!bStrictLaundering)
{
m_nVersion = std::max(m_nVersion, 1520);
}
}

/**********************************************************************
* TABFile::CreateFeature()
*
Expand Down
Loading

0 comments on commit 8f5d4de

Please sign in to comment.