001 package org.maltparser.core.io.dataformat; 002 003 import java.util.Iterator; 004 import java.util.SortedMap; 005 import java.util.SortedSet; 006 import java.util.TreeMap; 007 import java.util.TreeSet; 008 009 import org.maltparser.core.exception.MaltChainedException; 010 import org.maltparser.core.symbol.SymbolTable; 011 import org.maltparser.core.symbol.SymbolTableHandler; 012 013 /** 014 * 015 * 016 * @author Johan Hall 017 * @since 1.0 018 **/ 019 public class DataFormatInstance implements Iterable<ColumnDescription> { 020 private final SortedSet<ColumnDescription> columnDescriptions; 021 private SortedMap<String,ColumnDescription> headColumnDescriptions; 022 private SortedMap<String,ColumnDescription> dependencyEdgeLabelColumnDescriptions; 023 private SortedMap<String,ColumnDescription> phraseStructureEdgeLabelColumnDescriptions; 024 private SortedMap<String,ColumnDescription> phraseStructureNodeLabelColumnDescriptions; 025 private SortedMap<String,ColumnDescription> secondaryEdgeLabelColumnDescriptions; 026 private SortedMap<String,ColumnDescription> inputColumnDescriptions; 027 private SortedMap<String,ColumnDescription> ignoreColumnDescriptions; 028 029 private SortedSet<ColumnDescription> headColumnDescriptionSet; 030 private SortedSet<ColumnDescription> dependencyEdgeLabelColumnDescriptionSet; 031 private SortedSet<ColumnDescription> phraseStructureEdgeLabelColumnDescriptionSet; 032 private SortedSet<ColumnDescription> phraseStructureNodeLabelColumnDescriptionSet; 033 private SortedSet<ColumnDescription> secondaryEdgeLabelColumnDescriptionSet; 034 private SortedSet<ColumnDescription> inputColumnDescriptionSet; 035 private SortedSet<ColumnDescription> ignoreColumnDescriptionSet; 036 037 private SortedMap<String,SymbolTable> dependencyEdgeLabelSymbolTables; 038 private SortedMap<String,SymbolTable> phraseStructureEdgeLabelSymbolTables; 039 private SortedMap<String,SymbolTable> phraseStructureNodeLabelSymbolTables; 040 private SortedMap<String,SymbolTable> secondaryEdgeLabelSymbolTables; 041 private SortedMap<String,SymbolTable> inputSymbolTables; 042 043 044 private SymbolTableHandler symbolTables; 045 private DataFormatSpecification dataFormarSpec; 046 047 public DataFormatInstance(SortedMap<String, DataFormatEntry> entries, SymbolTableHandler symbolTables, String nullValueStrategy, String rootLabel, DataFormatSpecification spec) throws MaltChainedException { 048 columnDescriptions = new TreeSet<ColumnDescription>(); 049 this.symbolTables = symbolTables; 050 createColumnDescriptions(entries, nullValueStrategy, rootLabel); 051 setDataFormarSpec(spec); 052 } 053 054 private void createColumnDescriptions(SortedMap<String, DataFormatEntry> entries, String nullValueStrategy, String rootLabel) throws MaltChainedException { 055 for (DataFormatEntry entry : entries.values()) { 056 columnDescriptions.add(new ColumnDescription(entry.getPosition(), entry.getDataFormatEntryName(), entry.getCategory(), entry.getType(), entry.getDefaultOutput(), symbolTables, nullValueStrategy, rootLabel)); 057 } 058 } 059 060 public ColumnDescription getColumnDescriptionByName(String name) { 061 for (ColumnDescription column : columnDescriptions) { 062 if (column.getName().equals(name)) { 063 return column; 064 } 065 } 066 return null; 067 } 068 069 public int getNumberOfColumnDescriptions() { 070 return columnDescriptions.size(); 071 } 072 073 public Iterator<ColumnDescription> iterator() { 074 return columnDescriptions.iterator(); 075 } 076 077 public DataFormatSpecification getDataFormarSpec() { 078 return dataFormarSpec; 079 } 080 081 private void setDataFormarSpec(DataFormatSpecification dataFormarSpec) { 082 this.dataFormarSpec = dataFormarSpec; 083 } 084 085 protected void createHeadColumnDescriptions() { 086 headColumnDescriptions = new TreeMap<String,ColumnDescription>(); 087 for (ColumnDescription column : columnDescriptions) { 088 if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE) { 089 headColumnDescriptions.put(column.getName(), column); 090 } 091 } 092 } 093 094 public ColumnDescription getHeadColumnDescription() { 095 if (headColumnDescriptions == null) { 096 createHeadColumnDescriptions(); 097 } 098 return headColumnDescriptions.get(headColumnDescriptions.firstKey()); 099 } 100 101 public SortedMap<String,ColumnDescription> getHeadColumnDescriptions() { 102 if (headColumnDescriptions == null) { 103 createHeadColumnDescriptions(); 104 } 105 return headColumnDescriptions; 106 } 107 108 protected void createDependencyEdgeLabelSymbolTables() { 109 dependencyEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 110 for (ColumnDescription column : columnDescriptions) { 111 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 112 dependencyEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 113 } 114 } 115 } 116 117 public SortedMap<String,SymbolTable> getDependencyEdgeLabelSymbolTables() { 118 if (dependencyEdgeLabelSymbolTables == null) { 119 createDependencyEdgeLabelSymbolTables(); 120 } 121 return dependencyEdgeLabelSymbolTables; 122 } 123 124 protected void createDependencyEdgeLabelColumnDescriptions() { 125 dependencyEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 126 for (ColumnDescription column : columnDescriptions) { 127 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 128 dependencyEdgeLabelColumnDescriptions.put(column.getName(), column); 129 } 130 } 131 } 132 133 public SortedMap<String,ColumnDescription> getDependencyEdgeLabelColumnDescriptions() { 134 if (dependencyEdgeLabelColumnDescriptions == null) { 135 createDependencyEdgeLabelColumnDescriptions(); 136 } 137 return dependencyEdgeLabelColumnDescriptions; 138 } 139 140 141 142 protected void createPhraseStructureEdgeLabelSymbolTables() { 143 phraseStructureEdgeLabelSymbolTables = new TreeMap<String, SymbolTable>(); 144 for (ColumnDescription column : columnDescriptions) { 145 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 146 phraseStructureEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 147 } 148 } 149 } 150 151 public SortedMap<String,SymbolTable> getPhraseStructureEdgeLabelSymbolTables() { 152 if (phraseStructureEdgeLabelSymbolTables == null) { 153 createPhraseStructureEdgeLabelSymbolTables(); 154 } 155 return phraseStructureEdgeLabelSymbolTables; 156 } 157 158 protected void createPhraseStructureEdgeLabelColumnDescriptions() { 159 phraseStructureEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 160 for (ColumnDescription column : columnDescriptions) { 161 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 162 phraseStructureEdgeLabelColumnDescriptions.put(column.getName(), column); 163 } 164 } 165 } 166 167 public SortedMap<String,ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptions() { 168 if (phraseStructureEdgeLabelColumnDescriptions == null) { 169 createPhraseStructureEdgeLabelColumnDescriptions(); 170 } 171 return phraseStructureEdgeLabelColumnDescriptions; 172 } 173 174 protected void createPhraseStructureNodeLabelSymbolTables() { 175 phraseStructureNodeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 176 for (ColumnDescription column : columnDescriptions) { 177 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && column.getType() != ColumnDescription.IGNORE) { 178 phraseStructureNodeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 179 } 180 } 181 } 182 183 public SortedMap<String,SymbolTable> getPhraseStructureNodeLabelSymbolTables() { 184 if (phraseStructureNodeLabelSymbolTables == null) { 185 createPhraseStructureNodeLabelSymbolTables(); 186 } 187 return phraseStructureNodeLabelSymbolTables; 188 } 189 190 protected void createPhraseStructureNodeLabelColumnDescriptions() { 191 phraseStructureNodeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 192 for (ColumnDescription column : columnDescriptions) { 193 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && column.getType() != ColumnDescription.IGNORE) { 194 phraseStructureNodeLabelColumnDescriptions.put(column.getName(), column); 195 } 196 } 197 } 198 199 public SortedMap<String,ColumnDescription> getPhraseStructureNodeLabelColumnDescriptions() { 200 if (phraseStructureNodeLabelColumnDescriptions == null) { 201 createPhraseStructureNodeLabelColumnDescriptions(); 202 } 203 return phraseStructureNodeLabelColumnDescriptions; 204 } 205 206 protected void createSecondaryEdgeLabelSymbolTables() { 207 secondaryEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 208 for (ColumnDescription column : columnDescriptions) { 209 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 210 secondaryEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 211 } 212 } 213 } 214 215 public SortedMap<String,SymbolTable> getSecondaryEdgeLabelSymbolTables() { 216 if (secondaryEdgeLabelSymbolTables == null) { 217 createSecondaryEdgeLabelSymbolTables(); 218 } 219 return secondaryEdgeLabelSymbolTables; 220 } 221 222 protected void createSecondaryEdgeLabelColumnDescriptions() { 223 secondaryEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 224 for (ColumnDescription column : columnDescriptions) { 225 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 226 secondaryEdgeLabelColumnDescriptions.put(column.getName(), column); 227 } 228 } 229 } 230 231 public SortedMap<String,ColumnDescription> getSecondaryEdgeLabelColumnDescriptions() { 232 if (secondaryEdgeLabelColumnDescriptions == null) { 233 createSecondaryEdgeLabelColumnDescriptions(); 234 } 235 return secondaryEdgeLabelColumnDescriptions; 236 } 237 238 protected void createInputSymbolTables() { 239 inputSymbolTables = new TreeMap<String,SymbolTable>(); 240 for (ColumnDescription column : columnDescriptions) { 241 if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) { 242 inputSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 243 } 244 } 245 } 246 247 public SortedMap<String,SymbolTable> getInputSymbolTables() { 248 if (inputSymbolTables == null) { 249 createInputSymbolTables(); 250 } 251 return inputSymbolTables; 252 } 253 254 protected void createInputColumnDescriptions() { 255 inputColumnDescriptions = new TreeMap<String,ColumnDescription>(); 256 for (ColumnDescription column : columnDescriptions) { 257 if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) { 258 inputColumnDescriptions.put(column.getName(), column); 259 } 260 } 261 } 262 263 public SortedMap<String,ColumnDescription> getInputColumnDescriptions() { 264 if (inputColumnDescriptions == null) { 265 createInputColumnDescriptions(); 266 } 267 return inputColumnDescriptions; 268 } 269 270 protected void createIgnoreColumnDescriptions() { 271 ignoreColumnDescriptions = new TreeMap<String,ColumnDescription>(); 272 for (ColumnDescription column : columnDescriptions) { 273 if (column.getType() == ColumnDescription.IGNORE) { 274 ignoreColumnDescriptions.put(column.getName(), column); 275 } 276 } 277 } 278 279 public SortedMap<String,ColumnDescription> getIgnoreColumnDescriptions() { 280 if (ignoreColumnDescriptions == null) { 281 createIgnoreColumnDescriptions(); 282 } 283 return ignoreColumnDescriptions; 284 } 285 286 public SortedSet<ColumnDescription> getHeadColumnDescriptionSet() { 287 if (headColumnDescriptionSet == null) { 288 headColumnDescriptionSet = new TreeSet<ColumnDescription>(); 289 for (ColumnDescription column : columnDescriptions) { 290 if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE) { 291 headColumnDescriptionSet.add(column); 292 } 293 } 294 } 295 return headColumnDescriptionSet; 296 } 297 298 public SortedSet<ColumnDescription> getDependencyEdgeLabelColumnDescriptionSet() { 299 if (dependencyEdgeLabelColumnDescriptionSet == null) { 300 dependencyEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 301 for (ColumnDescription column : columnDescriptions) { 302 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 303 dependencyEdgeLabelColumnDescriptionSet.add(column); 304 } 305 } 306 } 307 return dependencyEdgeLabelColumnDescriptionSet; 308 } 309 310 public SortedSet<ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptionSet() { 311 if (phraseStructureEdgeLabelColumnDescriptionSet == null) { 312 phraseStructureEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 313 for (ColumnDescription column : columnDescriptions) { 314 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 315 phraseStructureEdgeLabelColumnDescriptionSet.add(column); 316 } 317 } 318 } 319 return phraseStructureEdgeLabelColumnDescriptionSet; 320 } 321 322 public SortedSet<ColumnDescription> getPhraseStructureNodeLabelColumnDescriptionSet() { 323 if (phraseStructureNodeLabelColumnDescriptionSet == null) { 324 phraseStructureNodeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 325 for (ColumnDescription column : columnDescriptions) { 326 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && column.getType() != ColumnDescription.IGNORE) { 327 phraseStructureNodeLabelColumnDescriptionSet.add(column); 328 } 329 } 330 } 331 return phraseStructureNodeLabelColumnDescriptionSet; 332 } 333 334 public SortedSet<ColumnDescription> getSecondaryEdgeLabelColumnDescriptionSet() { 335 if (secondaryEdgeLabelColumnDescriptionSet == null) { 336 secondaryEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 337 for (ColumnDescription column : columnDescriptions) { 338 if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE) { 339 secondaryEdgeLabelColumnDescriptionSet.add(column); 340 } 341 } 342 } 343 return secondaryEdgeLabelColumnDescriptionSet; 344 } 345 346 public SortedSet<ColumnDescription> getInputColumnDescriptionSet() { 347 if (inputColumnDescriptionSet == null) { 348 inputColumnDescriptionSet = new TreeSet<ColumnDescription>(); 349 for (ColumnDescription column : columnDescriptions) { 350 if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) { 351 inputColumnDescriptionSet.add(column); 352 } 353 } 354 } 355 return inputColumnDescriptionSet; 356 } 357 358 public SortedSet<ColumnDescription> getIgnoreColumnDescriptionSet() { 359 if (ignoreColumnDescriptionSet == null) { 360 ignoreColumnDescriptionSet = new TreeSet<ColumnDescription>(); 361 for (ColumnDescription column : columnDescriptions) { 362 if (column.getType() == ColumnDescription.IGNORE) { 363 ignoreColumnDescriptionSet.add(column); 364 } 365 } 366 } 367 return ignoreColumnDescriptionSet; 368 } 369 370 public SymbolTableHandler getSymbolTables() { 371 return symbolTables; 372 } 373 374 public String toString() { 375 final StringBuilder sb = new StringBuilder(); 376 for (ColumnDescription column : columnDescriptions) { 377 sb.append(column); 378 sb.append('\n'); 379 } 380 return sb.toString(); 381 } 382 }