From 412c4b08922e8749593fb9ebad08517f8d09340c Mon Sep 17 00:00:00 2001 From: ebocher Date: Mon, 20 May 2024 17:48:19 +0200 Subject: [PATCH] Read the cpg file attached to the shapefile --- .../functions/io/dbf/DBFDriverFunction.java | 2 + .../io/dbf/internal/DbaseFileHeader.java | 50 +++++++++++++++--- .../functions/io/shp/internal/SHPDriver.java | 13 +++++ .../h2gis/functions/io/shp/SHPEngineTest.java | 28 ++++++++++ .../functions/io/shp/urock_buildings.cpg | 1 + .../functions/io/shp/urock_buildings.dbf | Bin 0 -> 4080 bytes .../functions/io/shp/urock_buildings.prj | 1 + .../functions/io/shp/urock_buildings.shp | Bin 0 -> 404 bytes .../functions/io/shp/urock_buildings.shx | Bin 0 -> 116 bytes 9 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.cpg create mode 100644 h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.dbf create mode 100644 h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.prj create mode 100644 h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.shp create mode 100644 h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.shx diff --git a/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/DBFDriverFunction.java b/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/DBFDriverFunction.java index 8200aa06e1..233a77c898 100644 --- a/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/DBFDriverFunction.java +++ b/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/DBFDriverFunction.java @@ -229,6 +229,8 @@ public String[] importFile(Connection connection, String tableReference, File stmt.execute("DROP TABLE IF EXISTS " + outputTable); stmt.close(); } + //Check if a cpg file exists + //TODO: DBFDriver dbfDriver = new DBFDriver(); dbfDriver.initDriverFromFile(fileName, options); DbaseFileHeader dbfHeader = dbfDriver.getDbaseFileHeader(); diff --git a/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/internal/DbaseFileHeader.java b/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/internal/DbaseFileHeader.java index eed31274f8..08565c3a08 100644 --- a/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/internal/DbaseFileHeader.java +++ b/h2gis-functions/src/main/java/org/h2gis/functions/io/dbf/internal/DbaseFileHeader.java @@ -503,15 +503,49 @@ public void readHeader(FileChannel channel,String forceEncoding) throws IOExcept // skip / skip thesreserved bytes in the header. in.skip(17); - // read Language driver - byte lngDriver = in.get(); - String encoding = CODE_PAGE_ENCODING.get(lngDriver); - if(encoding!=null && forceEncoding == null) { - this.fileEncoding = encoding; - } - // skip reserved - in.skip(2); + // read Language driver + byte lngDriver = in.get(); + String encoding = CODE_PAGE_ENCODING.get(lngDriver); + // skip reserved + in.skip(2); + //FROM : https://github.com/OSGeo/gdal/blob/master/ogr/ogrsf_frmts/shape/ogrshapelayer.cpp#L526 + if(forceEncoding!=null){ + forceEncoding = forceEncoding.toUpperCase(); + try { + //Only code page + int codepage = Integer.valueOf(forceEncoding); + if ((codepage >= 437 && codepage <= 950) || (codepage >= 1250 && codepage <= 1258)) { + this.fileEncoding = new StringBuffer("CP").append(forceEncoding).toString(); + } + else if (forceEncoding.startsWith("8859")) + { + if(forceEncoding.startsWith("-", 4)){ + this.fileEncoding ="ISO-8859-5"; + }else{ + this.fileEncoding ="ISO-8859-4"; + } + } + else if (forceEncoding.startsWith("UTF-8")||forceEncoding.startsWith("UTF8")) + { + this.fileEncoding ="UTF-8"; + } + else if (forceEncoding.startsWith( "ANSI 1251")) { + this.fileEncoding = "CP1251"; + } + else + { + // Try just using the CPG value directly. Works for stuff like Big5. + fileEncoding = forceEncoding; + } + }catch (NumberFormatException ex){ + //Nothing to do + } + }else { + if(encoding!=null){ + this.fileEncoding = encoding; + } + } // calculate the number of Fields in the header fieldCnt = (headerLength - FILE_DESCRIPTOR_SIZE - 1) / FILE_DESCRIPTOR_SIZE; diff --git a/h2gis-functions/src/main/java/org/h2gis/functions/io/shp/internal/SHPDriver.java b/h2gis-functions/src/main/java/org/h2gis/functions/io/shp/internal/SHPDriver.java index cafc44d0a7..938866bd34 100644 --- a/h2gis-functions/src/main/java/org/h2gis/functions/io/shp/internal/SHPDriver.java +++ b/h2gis-functions/src/main/java/org/h2gis/functions/io/shp/internal/SHPDriver.java @@ -32,6 +32,7 @@ import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; /** * Merge ShapeFileReader and DBFReader. @@ -57,6 +58,7 @@ public class SHPDriver implements FileDriver { private int geometryFieldIndex = 0; private ShapeType shapeType; public File prjFile; + public File cpgFile; private int srid =0; @@ -168,6 +170,9 @@ else if(path.equals(fileNamePrefix+".dbf")){ else if(path.equals(fileNamePrefix+".prj")){ prjFile = entry.toFile(); return true; + } else if(path.equals(fileNamePrefix+".cpg")){ + cpgFile = entry.toFile(); + return true; } return false; } @@ -180,6 +185,14 @@ else if(path.equals(fileNamePrefix+".prj")){ } } if(dbfFile != null) { + //Read the CPG file if exists + if(cpgFile!=null){ + BufferedReader br = Files.newBufferedReader(cpgFile.toPath()); + String codePage; + if ((codePage = br.readLine()) != null && forceEncoding==null) { + forceEncoding = codePage.trim(); + } + } dbfDriver.initDriverFromFile(dbfFile, forceEncoding); } else { throw new IllegalArgumentException("DBF File not found"); diff --git a/h2gis-functions/src/test/java/org/h2gis/functions/io/shp/SHPEngineTest.java b/h2gis-functions/src/test/java/org/h2gis/functions/io/shp/SHPEngineTest.java index de3c7e67d4..fe8fe519ee 100644 --- a/h2gis-functions/src/test/java/org/h2gis/functions/io/shp/SHPEngineTest.java +++ b/h2gis-functions/src/test/java/org/h2gis/functions/io/shp/SHPEngineTest.java @@ -449,4 +449,32 @@ public void linkedShpSpatialIndexFlatQueryTest() throws SQLException { assertTrue(rs.getString(1).contains("PK_INDEX"), "Expected contains PK_INDEX but result is " + rs.getString(1)); } } + + @Test + public void readSHPWithCPGFileTest() throws SQLException { + Statement st = connection.createStatement(); + st.execute("drop table if exists shptable"); + st.execute("CALL FILE_TABLE('"+SHPEngineTest.class.getResource("urock_buildings.shp").getPath()+"', 'SHPtable');"); + try ( // Query declared Table columns + ResultSet rs = st.executeQuery("SELECT * FROM SHPTABLE where objektiden= '71193131-5b61-4d65-a661-fefb173bfd86';")) { + assertTrue(rs.next()); + assertEquals("71193131-5b61-4d65-a661-fefb173bfd86", rs.getString("objektiden")); + assertEquals("Bostad;Småhus friliggande",rs.getString("andamal1")); + } + st.execute("drop table shptable"); + } + + @Test + public void readSHPWithCPGFordeTest() throws SQLException { + Statement st = connection.createStatement(); + st.execute("drop table if exists shptable"); + st.execute("CALL SHPREAD( '"+SHPEngineTest.class.getResource("urock_buildings.shp").getPath()+"', 'SHPtable','windows-1252');"); + try ( // Query declared Table columns + ResultSet rs = st.executeQuery("SELECT * FROM SHPTABLE where objektiden= '3110e3c4-638c-485e-b5fe-a827ebafd071';")) { + assertTrue(rs.next()); + assertEquals("3110e3c4-638c-485e-b5fe-a827ebafd071", rs.getString("objektiden")); + assertEquals("Industri;Tillverkning",rs.getString("andamal1")); + } + st.execute("drop table shptable"); + } } diff --git a/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.cpg b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.cpg new file mode 100644 index 0000000000..03ce6dfae7 --- /dev/null +++ b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.cpg @@ -0,0 +1 @@ +1252 \ No newline at end of file diff --git a/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.dbf b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.dbf new file mode 100644 index 0000000000000000000000000000000000000000..4439ae5561de3f7d0766bb0cb9ac9d25a00c3eb0 GIT binary patch literal 4080 zcmeHKJx{|h5Dg%KgoIcas!knP)B2pmN!tx3B-DlOl~X&Zo5Zc+q*DJc{026J+ceOR z2z7vk=9Y8r`7Y1T!;>#YSIu+7FwV~3FLgSIK(MklSS{|#uT7r$Qyyi4acMlRO80yC zf~QK%WGLbSZ|vbQ4Y`^r8gXMizj3>}p~Oc}jpwPFr@0JcQOy6?Z8r*jD;7onbtRrH z=dv0XQqd$9GE~*`{bd+R%9Nx(NqA~h^PA+0oUMmc_fyGe zLSxX!UjP5=Ha9xlv<{+cg9U)NHMUJ+d1KS|9B%qfz)k95m-{qe$OVi5I>_o^tA)It zMS3=B6XcZRP#Y1n9UB0*o$V?@&s1i~YsOQ-v$FXFGZbgWh~OcOA}z`ZiclZ9R-jg( z1p_`kVHutz%T<<&{!qlRw(dx3|NUz{2nNRm=vvzROvfi++Kf0RC3+R`zz5gz1Ll!! z+w1m_(?f0>aJQsa+g_=}arxW!ptil^B&&DT3j9k2PTck$W-6ns|B}4vM_(&QMJ&Ql Lzu)*TUUu^nyD%|6 literal 0 HcmV?d00001 diff --git a/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.prj b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.prj new file mode 100644 index 0000000000..2ab2e0977a --- /dev/null +++ b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.prj @@ -0,0 +1 @@ +PROJCS["SWEREF99_12_00",GEOGCS["GCS_SWEREF99",DATUM["D_SWEREF99",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",150000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",12.0],PARAMETER["Scale_Factor",1.0],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.shp b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.shp new file mode 100644 index 0000000000000000000000000000000000000000..ab30efe20246f7fd679cd86868a98bc7c5c256dd GIT binary patch literal 404 zcmZQzQ0HR64oQF^j~(ec5XqfO$e5st5(t&luY$msaH`Ln1TkdIwE ziYOzH=KwNmam<8W;Xu77uY?LqV=)V)5Tp(SVD?<%m;C@z%PqsKu?|Tu%q(?J9ZOlD zygQ58np`Azq3Z`4$ppj!*!(m}x_^rkP;bAXN-)SSkUQ9c7zCcwF`F_W$-&gBxV&F2 z2~@l6b<__lp!`M0wFw}8Q}WYOnm~TXTJfC{Kz>?;&u2~`9~OSF@LZGSs|8YzZax72 C6?Rtu literal 0 HcmV?d00001 diff --git a/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.shx b/h2gis-functions/src/test/resources/org/h2gis/functions/io/shp/urock_buildings.shx new file mode 100644 index 0000000000000000000000000000000000000000..58dad8d0e9de58c412fa8c7f8a760c393c5f1889 GIT binary patch literal 116 zcmZQzQ0HR64y;}~E$j7c7 PMbrq$a{%HpAPxWkmeCSt literal 0 HcmV?d00001