diff --git a/marytts-builder/src/main/java/marytts/tools/voiceimport/AllophonesExtractor.java b/marytts-builder/src/main/java/marytts/tools/voiceimport/AllophonesExtractor.java index f64483f2dd..75e9669a0c 100644 --- a/marytts-builder/src/main/java/marytts/tools/voiceimport/AllophonesExtractor.java +++ b/marytts-builder/src/main/java/marytts/tools/voiceimport/AllophonesExtractor.java @@ -177,7 +177,13 @@ public void generateAllophonesFile(String basename) File textFile = new File(fullFileName); String text = FileUtils.readFileToString(textFile, "UTF-8"); - // First, test if there is a corresponding .rawmaryxml file in textdir: + // First, test if there is a corresponding .lexicon file in lexicon dir: + String lexiconfilename=db.getProp(db.LEXICONDIR) + File.separator + basename + db.getProp(db.LEXICONEXT); + File lexiconFile = new File(lexiconfilename); + if (!lexiconFile.exists()) { + lexiconfilename = null; + } + // Then, test if there is a corresponding .rawmaryxml file in maryxmlDir: File rawmaryxmlFile = new File(db.getProp(db.MARYXMLDIR) + File.separator + basename + db.getProp(db.MARYXMLEXT)); if (rawmaryxmlFile.exists()) { if (style.isEmpty()) { @@ -212,9 +218,14 @@ public void generateAllophonesFile(String basename) prosodyOpeningTag = String.format("<%s style=\"%s\">\n", MaryXML.PROSODY, style); prosodyClosingTag = String.format("\n", MaryXML.PROSODY); } - text = getMaryXMLHeaderWithInitialBoundary(xmlLocale) + prosodyOpeningTag + text + prosodyClosingTag + ""; + text = getMaryXMLHeaderWithInitialBoundary(xmlLocale, lexiconfilename) + prosodyOpeningTag + text + prosodyClosingTag + ""; } +// System.out.println("----------------------"); +// System.out.println(text); +// System.out.println("----------------------"); +// + OutputStream os = new BufferedOutputStream(new FileOutputStream(new File(outputDir, basename + featsExt))); MaryHttpClient maryClient = getMaryClient(); maryClient.process(text, maryInputType, maryOutputType, db.getProp(db.LOCALE), null, null, os); @@ -222,12 +233,17 @@ public void generateAllophonesFile(String basename) os.close(); } - public static String getMaryXMLHeaderWithInitialBoundary(String locale) // wtf? + public static String getMaryXMLHeaderWithInitialBoundary(String locale, String lexiconfilename) // wtf? { + if (lexiconfilename !=null) return "\n" + "\n" + "\n"; - + + "lexicon=\"" + lexiconfilename + "\"\n" + "xml:lang=\"" + locale + "\">\n" + "\n"; + + else + return "\n" + "\n" + "\n"; } /** diff --git a/marytts-builder/src/main/java/marytts/tools/voiceimport/DatabaseLayout.java b/marytts-builder/src/main/java/marytts/tools/voiceimport/DatabaseLayout.java index 7db47943e2..bf3a37a032 100644 --- a/marytts-builder/src/main/java/marytts/tools/voiceimport/DatabaseLayout.java +++ b/marytts-builder/src/main/java/marytts/tools/voiceimport/DatabaseLayout.java @@ -107,6 +107,10 @@ public class DatabaseLayout public static final String PTCEXT = "db.ptcExtension"; //directory for temporary files public static final String TEMPDIR = "db.tempDir"; + //lexicon dir + public static final String LEXICONDIR = "db.lexiconDir"; + //lexicon extension + public static final String LEXICONEXT = "db.lexiconExtension"; //maryxml dir public static final String MARYXMLDIR = "db.maryxmlDir"; //maryxml extentsion @@ -203,6 +207,8 @@ private void setupHelp() props2Help.put(LOCALE,"de, en or en_US"); props2Help.put(MARYBASE,"directory containing the local Mary installation"); props2Help.put(MARYBASEVERSION,"local Mary installation version"); + props2Help.put(LEXICONDIR,"directory containing lexicon representations of the transcripts. Will be created if it does not exist."); + props2Help.put(LEXICONEXT,"extension of the lexicon files, default: \".lexicon\""); props2Help.put(MARYXMLDIR,"directory containing maryxml representations of the transcripts. Will be created if it does not exist."); props2Help.put(MARYXMLEXT,"extension of the maryxml files, default: \".xml\""); props2Help.put(ROOTDIR,"directory in which all the files created during installation will be stored. Will be created if it does not exist."); @@ -692,6 +698,8 @@ private SortedMap initDefaultProps(SortedMap someP someProps.put(MARYEXT, ".mry"); someProps.put(BASENAMEFILE, rootDir+"basenames.lst"); someProps.put(TEMPDIR, rootDir+"temp"+fileSeparator); + someProps.put(LEXICONDIR, rootDir+"lexicon"+fileSeparator); + someProps.put(LEXICONEXT, ".lexicon"); someProps.put(MARYXMLDIR, rootDir+"rawmaryxml"+fileSeparator); someProps.put(MARYXMLEXT, ".xml"); someProps.put(PROMPTALLOPHONESDIR, rootDir+"prompt_allophones"+fileSeparator); @@ -738,6 +746,10 @@ private void assureFileIntegrity() checkDir(CONFIGDIR); /* check temp dir */ checkDir(TEMPDIR); + + /* check lexicon dir */ + checkDir(LEXICONDIR); + /* check maryxml dir */ checkDir(MARYXMLDIR); diff --git a/marytts-builder/src/test/resources/marytts/tools/voiceimport/database.config b/marytts-builder/src/test/resources/marytts/tools/voiceimport/database.config index a3a3263aef..3b259685d2 100644 --- a/marytts-builder/src/test/resources/marytts/tools/voiceimport/database.config +++ b/marytts-builder/src/test/resources/marytts/tools/voiceimport/database.config @@ -7,6 +7,8 @@ db.halfphoneFeatureDir /home/test/my_voice/halfphonefeatures/ db.halfphoneLabDir /home/test/my_voice/halfphonelab/ db.labDir /home/test/my_voice/lab/ db.labExtension .lab +db.lexiconDir /home/test/my_voice/lexicon/ +db.lexiconExtension .lexicon db.estDir /project/mary/Festival/speech_tools/ db.hplabExtension .hplab db.locale en_US diff --git a/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd b/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd index cd5d3b887c..e8704eea1f 100644 --- a/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd +++ b/marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd @@ -240,6 +240,7 @@ THIS SOFTWARE. + @@ -305,6 +306,7 @@ THIS SOFTWARE. + @@ -527,7 +529,7 @@ THIS SOFTWARE. - + @@ -543,7 +545,7 @@ THIS SOFTWARE. - + @@ -649,9 +651,10 @@ THIS SOFTWARE. + - + @@ -659,7 +662,7 @@ THIS SOFTWARE. - + @@ -676,6 +679,8 @@ THIS SOFTWARE. + + @@ -683,7 +688,7 @@ THIS SOFTWARE. - + diff --git a/marytts-runtime/src/main/java/marytts/util/dom/MaryDomUtils.java b/marytts-runtime/src/main/java/marytts/util/dom/MaryDomUtils.java index 2c8faa0a18..8929c86023 100644 --- a/marytts-runtime/src/main/java/marytts/util/dom/MaryDomUtils.java +++ b/marytts-runtime/src/main/java/marytts/util/dom/MaryDomUtils.java @@ -23,11 +23,19 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; import java.util.Locale; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; import marytts.datatypes.MaryXML; import marytts.exceptions.MaryConfigurationException; @@ -203,6 +211,23 @@ public static boolean isSchemaValid(Document doc) } return true; } + + public static void printDocument(Document doc, OutputStream out) throws IOException, TransformerException { + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer transformer = tf.newTransformer(); + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); + + transformer.transform(new DOMSource(doc), + (Result) new StreamResult(new OutputStreamWriter(out, "UTF-8"))); + } + + + + }