From f40cf24cb11f5365062e44ee0b06e73e5bce61ee Mon Sep 17 00:00:00 2001 From: Maxim Moinat Date: Mon, 11 Feb 2019 15:47:19 +0100 Subject: [PATCH 1/3] quick fix for fake generation bug --- .../fakeDataGenerator/FakeDataGenerator.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java index 0f18aa4c..8cb9be33 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java @@ -39,7 +39,7 @@ public class FakeDataGenerator { private RichConnection connection; // private DbType dbType; private int targetType; - private OneToManySet primaryKeyToValues; + private OneToManySet primaryKeyToValues; private int maxRowsPerTable = 1000; private static int REGULAR = 0; @@ -98,22 +98,22 @@ public void generateData(DbSettings dbSettings, int maxRowsPerTable, String file } private void findValuesForPrimaryKeys(Database database) { - Set primaryKeys = new HashSet(); + Set primaryKeys = new HashSet<>(); for (Table table : database.getTables()) { for (Field field : table.getFields()) { if (field.getValueCounts()[0][0].equals("List truncated...")) { - primaryKeys.add(field.getName()); + primaryKeys.add(field); } } } - primaryKeyToValues = new OneToManySet(); + primaryKeyToValues = new OneToManySet<>(); for (Table table : database.getTables()) { for (Field field : table.getFields()) { - if (primaryKeys.contains(field.getName()) && !field.getValueCounts()[0][0].equals("List truncated...")) { + if (primaryKeys.contains(field) && !field.getValueCounts()[0][0].equals("List truncated...")) { for (int i = 0; i < field.getValueCounts().length; i++) if (!field.getValueCounts()[i][0].equals("") && !field.getValueCounts()[i][0].equals("List truncated...")) - primaryKeyToValues.put(field.getName(), field.getValueCounts()[i][0]); + primaryKeyToValues.put(field, field.getValueCounts()[i][0]); } } } @@ -223,7 +223,7 @@ public ValueGenerator(Field field) { String[][] valueCounts = field.getValueCounts(); type = field.getType(); if (valueCounts[0][0].equals("List truncated...")) { - Set values = primaryKeyToValues.get(field.getName()); + Set values = primaryKeyToValues.get(field); if (values.size() != 0) { this.values = convertToArray(values); cursor = 0; From 8cdd92e6629ad3e861e8c279202bdf3deefe5a86 Mon Sep 17 00:00:00 2001 From: Maxim Moinat Date: Tue, 12 Feb 2019 11:29:23 +0100 Subject: [PATCH 2/3] simplified logic for generating random values by removing redundant primaryKeyToValues --- .../fakeDataGenerator/FakeDataGenerator.java | 35 ++----------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java index 8cb9be33..4fd6075a 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java @@ -39,7 +39,6 @@ public class FakeDataGenerator { private RichConnection connection; // private DbType dbType; private int targetType; - private OneToManySet primaryKeyToValues; private int maxRowsPerTable = 1000; private static int REGULAR = 0; @@ -69,7 +68,6 @@ public void generateData(DbSettings dbSettings, int maxRowsPerTable, String file StringUtilities.outputWithTime("Starting creation of fake data"); System.out.println("Loading scan report from " + filename); Database database = Database.generateModelFromScanReport(filename); - findValuesForPrimaryKeys(database); if (targetType == DbSettings.DATABASE) { connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType); @@ -97,28 +95,6 @@ public void generateData(DbSettings dbSettings, int maxRowsPerTable, String file StringUtilities.outputWithTime("Done"); } - private void findValuesForPrimaryKeys(Database database) { - Set primaryKeys = new HashSet<>(); - for (Table table : database.getTables()) { - for (Field field : table.getFields()) { - if (field.getValueCounts()[0][0].equals("List truncated...")) { - primaryKeys.add(field); - } - } - } - - primaryKeyToValues = new OneToManySet<>(); - for (Table table : database.getTables()) { - for (Field field : table.getFields()) { - if (primaryKeys.contains(field) && !field.getValueCounts()[0][0].equals("List truncated...")) { - for (int i = 0; i < field.getValueCounts().length; i++) - if (!field.getValueCounts()[i][0].equals("") && !field.getValueCounts()[i][0].equals("List truncated...")) - primaryKeyToValues.put(field, field.getValueCounts()[i][0]); - } - } - } - } - private List generateRows(Table table) { String[] fieldNames = new String[table.getFields().size()]; ValueGenerator[] valueGenerators = new ValueGenerator[table.getFields().size()]; @@ -223,15 +199,8 @@ public ValueGenerator(Field field) { String[][] valueCounts = field.getValueCounts(); type = field.getType(); if (valueCounts[0][0].equals("List truncated...")) { - Set values = primaryKeyToValues.get(field); - if (values.size() != 0) { - this.values = convertToArray(values); - cursor = 0; - generatorType = PRIMARY_KEY; - } else { - length = field.getMaxLength(); - generatorType = RANDOM; - } + length = field.getMaxLength(); + generatorType = RANDOM; } else { int length = valueCounts.length; if (valueCounts[length - 1][1].equals("")) // Last value could be "List truncated..." From 6b0004be3adb74fbef86ea70e318d973e5a55b7f Mon Sep 17 00:00:00 2001 From: Maxim Moinat Date: Tue, 24 Sep 2019 14:11:57 +0200 Subject: [PATCH 3/3] fix generation if source table is empty --- .../fakeDataGenerator/FakeDataGenerator.java | 25 ++++--------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java index 4fd6075a..e0d353c7 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java @@ -18,18 +18,15 @@ package org.ohdsi.whiteRabbit.fakeDataGenerator; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; -import org.ohdsi.databases.DbType; import org.ohdsi.databases.RichConnection; import org.ohdsi.rabbitInAHat.dataModel.Database; import org.ohdsi.rabbitInAHat.dataModel.Field; import org.ohdsi.rabbitInAHat.dataModel.Table; import org.ohdsi.utilities.StringUtilities; -import org.ohdsi.utilities.collections.OneToManySet; import org.ohdsi.utilities.files.Row; import org.ohdsi.utilities.files.WriteCSVFileWithHeader; import org.ohdsi.whiteRabbit.DbSettings; @@ -37,7 +34,6 @@ public class FakeDataGenerator { private RichConnection connection; - // private DbType dbType; private int targetType; private int maxRowsPerTable = 1000; @@ -45,24 +41,8 @@ public class FakeDataGenerator { private static int RANDOM = 1; private static int PRIMARY_KEY = 2; - public static void main(String[] args) { - FakeDataGenerator fakeDataGenerator = new FakeDataGenerator(); - - DbSettings dbSettings = new DbSettings(); - dbSettings.dataType = DbSettings.DATABASE; - dbSettings.dbType = DbType.POSTGRESQL; - dbSettings.server = "127.0.0.1/ohdsi"; - dbSettings.database = "ars"; - dbSettings.user = "postgres"; - dbSettings.password = "F1r3starter"; - - fakeDataGenerator.generateData(dbSettings, 100000, "c:/temp/ScanReport.xlsx", "c:/temp"); - // fakeDataGenerator.generateData(dbSettings, 1000, "C:/home/Research/EMIF WP12/ARS CDM loading/ScanReport.xlsx", "c:/temp"); - } - public void generateData(DbSettings dbSettings, int maxRowsPerTable, String filename, String folder) { this.maxRowsPerTable = maxRowsPerTable; - // this.dbType = dbSettings.dbType; this.targetType = dbSettings.dataType; StringUtilities.outputWithTime("Starting creation of fake data"); @@ -101,6 +81,11 @@ private List generateRows(Table table) { int size = maxRowsPerTable; for (int i = 0; i < table.getFields().size(); i++) { Field field = table.getFields().get(i); + // If a field in the table is empty, the whole table is empty. + // Return empty list (writes empty file) + if (field.getType().equals("empty")) { + return new ArrayList<>(); + } fieldNames[i] = field.getName(); ValueGenerator valueGenerator = new ValueGenerator(field); valueGenerators[i] = valueGenerator;