package org.lindenb.sandbox; import java.io.FileInputStream; import java.io.InputStream; import java.io.OutputStream; import java.util.Vector; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; /** * test for STAX * @author Pierre Lindenbaum PhD * */ public class STAXTinySeq { /** * A Tiny Seq * */ static private class TSeq { String type; int gi; String accver; String sid; int taxid; String orgname; String defline; int length; String sequence; } private Vector TSeqSet= new Vector(); /** * parse a XML stream of NCBI TinySeq and stores it in memory * @param in input * @throws XMLStreamException */ public void read(InputStream in) throws XMLStreamException { XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty("javax.xml.stream.isNamespaceAware", Boolean.FALSE); factory.setProperty("javax.xml.stream.isCoalescing", Boolean.TRUE); XMLEventReader parser = factory.createXMLEventReader(in); TSeq seq= null; while(parser.hasNext()) { XMLEvent event = parser.nextEvent(); if(event.isStartElement()) { StartElement start=((StartElement)event); String localName= start.getName().getLocalPart(); if(localName.equals("TSeq")) { seq= new TSeq(); this.TSeqSet.addElement(seq); } else if(localName.equals("TSeq_seqtype")) { seq.type= start.getAttributeByName(new QName("value")).getValue(); } else if(localName.equals("TSeq_gi")) { seq.gi= Integer.parseInt(parser.getElementText()); } else if(localName.equals("TSeq_accver")) { seq.accver= parser.getElementText(); } else if(localName.equals("TSeq_sid")) { seq.sid= parser.getElementText(); } else if(localName.equals("TSeq_taxid")) { seq.taxid= Integer.parseInt(parser.getElementText()); } else if(localName.equals("TSeq_orgname")) { seq.orgname= parser.getElementText(); } else if(localName.equals("TSeq_defline")) { seq.defline= parser.getElementText(); } else if(localName.equals("TSeq_length")) { seq.length= Integer.parseInt(parser.getElementText()); } else if(localName.equals("TSeq_sequence")) { seq.sequence= parser.getElementText(); } } } } /** * write the TSeq in memory to out using StaX * @param out output * @throws XMLStreamException */ public void write(OutputStream out) throws XMLStreamException { XMLOutputFactory factory= XMLOutputFactory.newInstance(); XMLStreamWriter w= factory.createXMLStreamWriter(out); w.writeStartDocument(); w.writeStartElement("TSeqSet"); for(TSeq seq: TSeqSet) { w.writeStartElement("TSeqSet"); w.writeEmptyElement("TSeq_seqtype"); w.writeAttribute("value", seq.type); w.writeStartElement("TSeq_gi"); w.writeCharacters(String.valueOf(seq.gi)); w.writeEndElement(); w.writeStartElement("TSeq_accver"); w.writeCharacters(seq.accver); w.writeEndElement(); w.writeStartElement("TSeq_sid"); w.writeCharacters(seq.sid); w.writeEndElement(); w.writeStartElement("TSeq_taxid"); w.writeCharacters(String.valueOf(seq.taxid)); w.writeEndElement(); w.writeStartElement("TSeq_orgname"); w.writeCharacters(seq.orgname); w.writeEndElement(); w.writeStartElement("TSeq_defline"); w.writeCharacters(seq.defline); w.writeEndElement(); w.writeStartElement("TSeq_length"); w.writeCharacters(String.valueOf(seq.length)); w.writeEndElement(); w.writeStartElement("TSeq_sequence"); w.writeCharacters(seq.sequence); w.writeEndElement(); w.writeEndElement(); } w.writeEndElement(); w.writeEndDocument(); w.flush(); } /** * Read some tinySeq XML files using Stax and echo to stdout * @param args */ public static void main(String[] args) { try { STAXTinySeq app= new STAXTinySeq(); for(String f: args) { app.read(new FileInputStream(f)); } app.write(System.out); } catch (Exception e) { e.printStackTrace(); } } }