001 package org.maltparser.parser; 002 003 import java.io.File; 004 import java.io.IOException; 005 import java.util.Formatter; 006 import java.util.regex.Pattern; 007 008 import org.apache.log4j.FileAppender; 009 import org.apache.log4j.Level; 010 import org.apache.log4j.Logger; 011 import org.apache.log4j.PatternLayout; 012 import org.maltparser.core.config.ConfigurationDir; 013 import org.maltparser.core.config.ConfigurationException; 014 import org.maltparser.core.config.ConfigurationRegistry; 015 import org.maltparser.core.exception.MaltChainedException; 016 import org.maltparser.core.helper.SystemLogger; 017 import org.maltparser.core.helper.Util; 018 import org.maltparser.core.io.dataformat.DataFormatInstance; 019 import org.maltparser.core.options.OptionManager; 020 import org.maltparser.core.propagation.PropagationManager; 021 import org.maltparser.core.symbol.SymbolTableHandler; 022 import org.maltparser.core.syntaxgraph.DependencyStructure; 023 import org.maltparser.parser.guide.ClassifierGuide; 024 025 /** 026 * @author Johan Hall 027 * 028 */ 029 public class SingleMalt implements DependencyParserConfig { 030 public static final int LEARN = 0; 031 public static final int PARSE = 1; 032 protected ConfigurationDir configDir; 033 protected Logger configLogger; 034 protected int optionContainerIndex; 035 protected Algorithm parsingAlgorithm = null; 036 protected int mode; 037 protected ConfigurationRegistry registry; 038 protected SymbolTableHandler symbolTableHandler; 039 protected long startTime; 040 protected long endTime; 041 protected int nIterations = 0; 042 protected PropagationManager propagationManager; 043 044 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException { 045 046 this.optionContainerIndex = containerIndex; 047 this.mode = mode; 048 setConfigurationDir(configDir); 049 startTime = System.currentTimeMillis(); 050 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString()); 051 registry = new ConfigurationRegistry(); 052 symbolTableHandler = dataFormatInstance.getSymbolTables(); 053 054 if (mode == SingleMalt.LEARN) { 055 checkOptionDependency(); 056 } 057 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables()); 058 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance); 059 // registry.put(org.maltparser.parser.DependencyParserConfig.class, this); 060 initPropagation(); 061 initParsingAlgorithm(); 062 063 } 064 065 private void initPropagation() throws MaltChainedException { 066 String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString(); 067 if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) { 068 return; 069 } 070 propagationManager = new PropagationManager(configDir, symbolTableHandler); 071 if (mode == SingleMalt.LEARN) { 072 propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName); 073 OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName); 074 } 075 getConfigLogger().info(" Propagation : " + propagationSpecFileName+"\n"); 076 propagationManager.loadSpecification(propagationSpecFileName); 077 } 078 079 /** 080 * Initialize the parsing algorithm 081 * 082 * @throws MaltChainedException 083 */ 084 protected void initParsingAlgorithm() throws MaltChainedException { 085 if (mode == LEARN) { 086 parsingAlgorithm = new BatchTrainer(this); 087 } else if (mode == PARSE) { 088 parsingAlgorithm = new DeterministicParser(this); 089 } 090 } 091 092 public void addRegistry(Class<?> clazz, Object o) { 093 registry.put(clazz, o); 094 } 095 096 public void process(Object[] arguments) throws MaltChainedException { 097 if (mode == LEARN) { 098 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) { 099 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. "); 100 } 101 DependencyStructure systemGraph = (DependencyStructure)arguments[0]; 102 DependencyStructure goldGraph = (DependencyStructure)arguments[1]; 103 if (systemGraph.hasTokens() && getGuide() != null) { 104 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph)); 105 } 106 } else if (mode == PARSE) { 107 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) { 108 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. "); 109 } 110 DependencyStructure processGraph = (DependencyStructure)arguments[0]; 111 if (processGraph.hasTokens()) { 112 ((Parser)getAlgorithm()).parse(processGraph); 113 } 114 } 115 } 116 117 public void parse(DependencyStructure graph) throws MaltChainedException { 118 if (graph.hasTokens()) { 119 ((Parser)getAlgorithm()).parse(graph); 120 } 121 } 122 123 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException { 124 if (oracleGraph.hasTokens()) { 125 if (getGuide() != null) { 126 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph)); 127 } else { 128 ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph); 129 } 130 } 131 } 132 133 public void train() throws MaltChainedException { 134 if (getGuide() == null) { 135 ((Trainer)getAlgorithm()).train(); 136 } 137 } 138 139 public void terminate(Object[] arguments) throws MaltChainedException { 140 // if (getAlgorithm() instanceof Trainer) { 141 // ((Trainer)getAlgorithm()).terminate(); 142 // } 143 getAlgorithm().terminate(); 144 if (getGuide() != null) { 145 getGuide().terminate(); 146 } 147 if (mode == LEARN) { 148 endTime = System.currentTimeMillis(); 149 long elapsed = endTime - startTime; 150 if (configLogger.isInfoEnabled()) { 151 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 152 } 153 } else if (mode == PARSE) { 154 endTime = System.currentTimeMillis(); 155 long elapsed = endTime - startTime; 156 if (configLogger.isInfoEnabled()) { 157 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 158 } 159 } 160 if (SystemLogger.logger() != configLogger && configLogger != null) { 161 configLogger.removeAllAppenders(); 162 } 163 } 164 165 /** 166 * Initialize the configuration logger 167 * 168 * @return the configuration logger 169 * @throws MaltChainedException 170 */ 171 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException { 172 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) { 173 configLogger = Logger.getLogger(logfile); 174 FileAppender fileAppender = null; 175 try { 176 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true); 177 } catch(IOException e) { 178 throw new ConfigurationException("It is not possible to create a configuration log file. ", e); 179 } 180 fileAppender.setThreshold(Level.toLevel(level, Level.INFO)); 181 configLogger.addAppender(fileAppender); 182 configLogger.setLevel(Level.toLevel(level, Level.INFO)); 183 } else { 184 configLogger = SystemLogger.logger(); 185 } 186 187 return configLogger; 188 } 189 190 public Logger getConfigLogger() { 191 return configLogger; 192 } 193 194 public void setConfigLogger(Logger logger) { 195 configLogger = logger; 196 } 197 198 public ConfigurationDir getConfigurationDir() { 199 return configDir; 200 } 201 202 public void setConfigurationDir(ConfigurationDir configDir) { 203 this.configDir = configDir; 204 } 205 206 public int getMode() { 207 return mode; 208 } 209 210 public ConfigurationRegistry getRegistry() { 211 return registry; 212 } 213 214 public void setRegistry(ConfigurationRegistry registry) { 215 this.registry = registry; 216 } 217 218 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException { 219 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname); 220 } 221 222 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException { 223 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname); 224 } 225 226 public OptionManager getOptionManager() throws MaltChainedException { 227 return OptionManager.instance(); 228 } 229 /******************************** MaltParserConfiguration specific ********************************/ 230 231 /** 232 * Returns the list of symbol tables 233 * 234 * @return the list of symbol tables 235 */ 236 public SymbolTableHandler getSymbolTables() { 237 return symbolTableHandler; 238 } 239 240 public PropagationManager getPropagationManager() { 241 return propagationManager; 242 } 243 244 public Algorithm getAlgorithm() { 245 return parsingAlgorithm; 246 } 247 /** 248 * Returns the guide 249 * 250 * @return the guide 251 */ 252 public ClassifierGuide getGuide() { 253 return parsingAlgorithm.getGuide(); 254 } 255 256 public void checkOptionDependency() throws MaltChainedException { 257 try { 258 if (configDir.getInfoFileWriter() != null) { 259 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n"); 260 } 261 262 // Copy the feature model file into the configuration directory 263 String featureModelFileName = getOptionValue("guide", "features").toString().trim(); 264 if (featureModelFileName.equals("")) { 265 // use default feature model depending on the selected parser algorithm 266 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm")); 267 featureModelFileName = getOptionValue("guide", "features").toString().trim(); 268 featureModelFileName = featureModelFileName.replace("{learner}", getOptionValueString("guide", "learner")); 269 featureModelFileName = configDir.copyToConfig(Util.findURLinJars(featureModelFileName)); 270 } else { 271 featureModelFileName = configDir.copyToConfig(featureModelFileName); 272 } 273 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName); 274 if (configDir.getInfoFileWriter() != null) { 275 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n"); 276 } 277 278 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) { 279 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n "); 280 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", ""); 281 if (configDir.getInfoFileWriter() != null) { 282 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n"); 283 } 284 } 285 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) { 286 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n"); 287 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", ""); 288 if (configDir.getInfoFileWriter() != null) { 289 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n"); 290 } 291 } 292 293 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim(); 294 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim(); 295 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim(); 296 StringBuilder newDecisionSettings = new StringBuilder(); 297 // if ((Boolean)getOptionValue("malt0.4", "behavior") == true) { 298 // decisionSettings = "T.TRANS+A.DEPREL"; 299 // } 300 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) { 301 decisionSettings = "T.TRANS+A.DEPREL"; 302 } else { 303 decisionSettings = decisionSettings.toUpperCase(); 304 } 305 306 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 307 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) { 308 newDecisionSettings.append("+A.PPLIFTED"); 309 } 310 } 311 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 312 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) { 313 newDecisionSettings.append("+A.PPPATH"); 314 } 315 } 316 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) { 317 newDecisionSettings.append("+A.PPCOVERED"); 318 } 319 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) { 320 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString()); 321 if (configDir.getInfoFileWriter() != null) { 322 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n"); 323 } 324 } 325 if (configDir.getInfoFileWriter() != null) { 326 configDir.getInfoFileWriter().flush(); 327 } 328 } catch (IOException e) { 329 throw new ConfigurationException("Could not write to the configuration information file. ", e); 330 } 331 } 332 }