Skip to content

Commit

Permalink
Allow user lexicon definition at synthesis time. Issue marytts-it#5
Browse files Browse the repository at this point in the history
  • Loading branch information
ftesser authored and alize committed Mar 4, 2013
1 parent 9e86d83 commit d827e51
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,13 @@ public void generateAllophonesFile(String basename)
File textFile = new File(fullFileName);
String text = FileUtils.readFileToString(textFile, "UTF-8");

// First, test if there is a corresponding .rawmaryxml file in textdir:
// First, test if there is a corresponding .lexicon file in lexicon dir:
String lexiconfilename=db.getProp(db.LEXICONDIR) + File.separator + basename + db.getProp(db.LEXICONEXT);
File lexiconFile = new File(lexiconfilename);
if (!lexiconFile.exists()) {
lexiconfilename = null;
}
// Then, test if there is a corresponding .rawmaryxml file in maryxmlDir:
File rawmaryxmlFile = new File(db.getProp(db.MARYXMLDIR) + File.separator + basename + db.getProp(db.MARYXMLEXT));
if (rawmaryxmlFile.exists()) {
if (style.isEmpty()) {
Expand Down Expand Up @@ -212,22 +218,32 @@ public void generateAllophonesFile(String basename)
prosodyOpeningTag = String.format("<%s style=\"%s\">\n", MaryXML.PROSODY, style);
prosodyClosingTag = String.format("</%s>\n", MaryXML.PROSODY);
}
text = getMaryXMLHeaderWithInitialBoundary(xmlLocale) + prosodyOpeningTag + text + prosodyClosingTag + "</maryxml>";
text = getMaryXMLHeaderWithInitialBoundary(xmlLocale, lexiconfilename) + prosodyOpeningTag + text + prosodyClosingTag + "</maryxml>";
}

// System.out.println("----------------------");
// System.out.println(text);
// System.out.println("----------------------");
//

OutputStream os = new BufferedOutputStream(new FileOutputStream(new File(outputDir, basename + featsExt)));
MaryHttpClient maryClient = getMaryClient();
maryClient.process(text, maryInputType, maryOutputType, db.getProp(db.LOCALE), null, null, os);
os.flush();
os.close();
}

public static String getMaryXMLHeaderWithInitialBoundary(String locale) // wtf?
public static String getMaryXMLHeaderWithInitialBoundary(String locale, String lexiconfilename) // wtf?
{
if (lexiconfilename !=null)
return "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" + "<maryxml version=\"0.4\"\n"
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" + "xmlns=\"http://mary.dfki.de/2002/MaryXML\"\n"
+ "xml:lang=\"" + locale + "\">\n" + "<boundary breakindex=\"2\" duration=\"100\"/>\n";

+ "lexicon=\"" + lexiconfilename + "\"\n" + "xml:lang=\"" + locale + "\">\n" + "<boundary breakindex=\"2\" duration=\"100\"/>\n";

else
return "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" + "<maryxml version=\"0.4\"\n"
+ "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" + "xmlns=\"http://mary.dfki.de/2002/MaryXML\"\n"
+ "xml:lang=\"" + locale + "\">\n" + "<boundary breakindex=\"2\" duration=\"100\"/>\n";
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ public class DatabaseLayout
public static final String PTCEXT = "db.ptcExtension";
//directory for temporary files
public static final String TEMPDIR = "db.tempDir";
//lexicon dir
public static final String LEXICONDIR = "db.lexiconDir";
//lexicon extension
public static final String LEXICONEXT = "db.lexiconExtension";
//maryxml dir
public static final String MARYXMLDIR = "db.maryxmlDir";
//maryxml extentsion
Expand Down Expand Up @@ -203,6 +207,8 @@ private void setupHelp()
props2Help.put(LOCALE,"de, en or en_US");
props2Help.put(MARYBASE,"directory containing the local Mary installation");
props2Help.put(MARYBASEVERSION,"local Mary installation version");
props2Help.put(LEXICONDIR,"directory containing lexicon representations of the transcripts. Will be created if it does not exist.");
props2Help.put(LEXICONEXT,"extension of the lexicon files, default: \".lexicon\"");
props2Help.put(MARYXMLDIR,"directory containing maryxml representations of the transcripts. Will be created if it does not exist.");
props2Help.put(MARYXMLEXT,"extension of the maryxml files, default: \".xml\"");
props2Help.put(ROOTDIR,"directory in which all the files created during installation will be stored. Will be created if it does not exist.");
Expand Down Expand Up @@ -692,6 +698,8 @@ private SortedMap<String,String> initDefaultProps(SortedMap<String,String> someP
someProps.put(MARYEXT, ".mry");
someProps.put(BASENAMEFILE, rootDir+"basenames.lst");
someProps.put(TEMPDIR, rootDir+"temp"+fileSeparator);
someProps.put(LEXICONDIR, rootDir+"lexicon"+fileSeparator);
someProps.put(LEXICONEXT, ".lexicon");
someProps.put(MARYXMLDIR, rootDir+"rawmaryxml"+fileSeparator);
someProps.put(MARYXMLEXT, ".xml");
someProps.put(PROMPTALLOPHONESDIR, rootDir+"prompt_allophones"+fileSeparator);
Expand Down Expand Up @@ -738,6 +746,10 @@ private void assureFileIntegrity()
checkDir(CONFIGDIR);
/* check temp dir */
checkDir(TEMPDIR);

/* check lexicon dir */
checkDir(LEXICONDIR);

/* check maryxml dir */
checkDir(MARYXMLDIR);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ db.halfphoneFeatureDir /home/test/my_voice/halfphonefeatures/
db.halfphoneLabDir /home/test/my_voice/halfphonelab/
db.labDir /home/test/my_voice/lab/
db.labExtension .lab
db.lexiconDir /home/test/my_voice/lexicon/
db.lexiconExtension .lexicon
db.estDir /project/mary/Festival/speech_tools/
db.hplabExtension .hplab
db.locale en_US
Expand Down
15 changes: 10 additions & 5 deletions marytts-common/src/main/resources/marytts/util/dom/MaryXML.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ THIS SOFTWARE.
<xsd:restriction base="xsd:string">
<xsd:enumeration value="first"/>
<xsd:enumeration value="last"/>
<xsd:enumeration value="last-proclitics"/>
</xsd:restriction>
</xsd:simpleType>
<xsd:simpleType name="gtobi_accent.type">
Expand Down Expand Up @@ -305,6 +306,7 @@ THIS SOFTWARE.
<xsd:enumeration value="lexicon"/>
<xsd:enumeration value="userdict"/>
<xsd:enumeration value="compound"/>
<xsd:enumeration value="privatedict"/>
<xsd:enumeration value="rules"/>
<xsd:enumeration value="foreign:en"/>
<xsd:enumeration value="phonemiseDenglish"/>
Expand Down Expand Up @@ -527,7 +529,7 @@ THIS SOFTWARE.
<xsd:simpleType>
<xsd:union memberTypes="xsd:string xsd:positiveInteger relative.change relative.change.st absolute.value.hz pitch.labels"/>
</xsd:simpleType>
</xsd:attribute>
</xsd:attribute>
<xsd:attribute name="contour" type="xsd:string"/>
<xsd:attribute name="pitch-dynamics">
<xsd:annotation>
Expand All @@ -543,7 +545,7 @@ THIS SOFTWARE.
</xsd:attribute>
<xsd:attribute name="range">
<xsd:simpleType>
<xsd:union memberTypes="relative.change relative.change.st absolute.value.st"/>
<xsd:union memberTypes="relative.change relative.change.st absolute.value.st"/>
</xsd:simpleType>
</xsd:attribute>
<xsd:attribute name="range-dynamics">
Expand Down Expand Up @@ -649,17 +651,18 @@ THIS SOFTWARE.
<xsd:group ref="allowed-within-phrase"/>
</xsd:choice>
<xsd:attribute name="orig" type="xsd:string"/>
<!-- TODO perhaps this is better place to put info for compund lexicon info... -->
<xsd:attribute name="accent" type="accentposition.type"/>
</xsd:complexType>


<xsd:element name="vocalization" substitutionGroup="awp" type="vocalization"/>
<xsd:complexType mixed="true" name="vocalization">
<xsd:attribute name="name" type="xsd:string"/>
<xsd:attribute name="voicequality" type="xsd:string"/>
<xsd:attribute name="intonation" type="xsd:string"/>
<xsd:attribute name="meaning" type="xsd:string"/>
<xsd:attribute name="variant" type="xsd:string"/>
</xsd:complexType>
</xsd:complexType>

<xsd:element name="t" substitutionGroup="awp" type="token"/>
<xsd:complexType mixed="true" name="token">
Expand All @@ -676,14 +679,16 @@ THIS SOFTWARE.
<xsd:attribute name="syn_phrase" type="xsd:string"/>
<xsd:attribute name="given" type="givenness.type"/>
<xsd:attribute name="contrast" type="contrast.type"/>
<xsd:attribute name="merged-token" type="xsd:string"/> <!-- TO CHECK -->

</xsd:complexType>

<xsd:complexType name="syllable">
<xsd:sequence>
<!-- The phonemes composing the syllable -->
<xsd:element maxOccurs="unbounded" name="ph" type="ph"/>
</xsd:sequence>
<xsd:attribute name="ph" type="xsd:string"/>
<xsd:attribute name="ph" type="xsd:string"/>
<xsd:attribute name="tone" type="xsd:string"/>
<xsd:attribute default="0" name="stress">
<xsd:simpleType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ private void mtuMergeTokenPostlex(Document doc)
c = MaryDomUtils.getLastChildElement(c);
// merge ph and POS?
c.setAttribute("merged-token", "yes");
c.setAttribute("g2p_method", c1.getAttribute("g2p_method") + "+" + c.getAttribute("g2p_method"));
//c.setAttribute("g2p_method", "compound:" + c1.getAttribute("g2p_method") + "+" + c.getAttribute("g2p_method"));
c.setAttribute("g2p_method", "compound"); // + c1.getAttribute("g2p_method") + "+" + c.getAttribute("g2p_method"));
// TODO: accent= to merge? take the first or the second?
//c.setAttribute("accent", c1.getAttribute("accent")); // + "+" + c.getAttribute("accent"));
//c.removeAttribute("accent");
Expand Down
25 changes: 25 additions & 0 deletions marytts-runtime/src/main/java/marytts/util/dom/MaryDomUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,19 @@
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Locale;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import marytts.datatypes.MaryXML;
import marytts.exceptions.MaryConfigurationException;
Expand Down Expand Up @@ -203,6 +211,23 @@ public static boolean isSchemaValid(Document doc)
}
return true;
}

public static void printDocument(Document doc, OutputStream out) throws IOException, TransformerException {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");

transformer.transform(new DOMSource(doc),
(Result) new StreamResult(new OutputStreamWriter(out, "UTF-8")));
}





}

0 comments on commit d827e51

Please sign in to comment.