package org.lindenb.sandbox; import java.io.BufferedReader; import java.io.InputStreamReader; import javax.script.Compilable; import javax.script.CompiledScript; import javax.script.ScriptEngine; import javax.script.ScriptEngineFactory; import javax.script.ScriptEngineManager; import javax.script.ScriptException; import javax.script.SimpleBindings; /** * my first test for java 1.6 mustang/ ScriptEngine * @author Pierre Lindenbaum PhD * * example: cat *.fasta | java org.lindenb.sandox.FastaAWK "FastaAWK.gcPercent(sequence)<0.45 || (sequence.length>900) || (FastaAWK .reverseComplement(sequence).toUpperCase().indexOf('ATGCTTCTTG')!=-1 && name.indexOf('Xenopus')!=-1 );" * */ public class FastaAWK { /** header of fasta sequence */ private String name=null; /** dna sequence */ private StringBuilder sequence= new StringBuilder(); /** compiled user script */ private CompiledScript compiledScript=null; /** constructor * initialize the statements * @param statements */ public FastaAWK(String args[]) throws ScriptException { //copy statements to add the 'importClass' String statements[]= new String[args.length+1]; //import this class to get a handle on gcPercent and reverseComplement statements[0]="importClass(Packages."+this.getClass().getName()+");"; System.arraycopy(args, 0, statements, 1, args.length); //get a javascript engine ScriptEngineManager sem = new ScriptEngineManager(); ScriptEngine scriptEngine = sem.getEngineByName("js"); ScriptEngineFactory scriptEngineFactory= scriptEngine.getFactory(); String program = scriptEngineFactory.getProgram(statements); //compile this program this.compiledScript=((Compilable) scriptEngine).compile(program); } /** * * @param sequence the dna sequence * @return the GC% */ public static double gcPercent(String sequence) { int n=0; for(int i=0;i< sequence.length();++i) { char base= Character.toUpperCase(sequence.charAt(i)); n+=(base=='G' || base=='C'?1:0); } return (double)(n/(double)sequence.length()); } /** * return the reverse complement of a sequence * @param sequence the dna sequence * @return the reverse complement of a sequence */ public static String reverseComplement(String sequence) { StringBuilder b= new StringBuilder(sequence.length()); for(int i=sequence.length()-1;i>=0;--i) { switch(sequence.charAt(i)) { case 'A': b.append('T');break; case 'T': b.append('A');break; case 'G': b.append('C');break; case 'C': b.append('G');break; case 'a': b.append('t');break; case 't': b.append('a');break; case 'g': b.append('c');break; case 'c': b.append('g');break; default: b.append('N');break; } } return b.toString(); } /** * print the current fasta sequence if the compiled script return true * @throws ScriptException */ public void eval() throws ScriptException { /* bind name and sequence to javascript variable 'name' and 'sequence'*/ SimpleBindings bindings= new SimpleBindings(); bindings.put("name", this.name); bindings.put("sequence", this.sequence.toString()); //invoke the script with the current binding and get the result Object o=this.compiledScript.eval(bindings); if(o==null || !(o instanceof Boolean ) ) return; //if the result is true: print the fasta sequence Boolean b=(Boolean)o; if(b.equals(Boolean.FALSE)) { return; } System.out.print(name); for(int i=0;i< sequence.length();++i) { if(i%50==0) System.out.println(); System.out.print(sequence.charAt(i)); } System.out.println(); } /** * @param args */ public static void main(String[] args) { try { FastaAWK awk= new FastaAWK(args); //loop over fasta sequences BufferedReader in= new BufferedReader(new InputStreamReader(System.in)); String line=null; while((line=in.readLine())!=null) { if(line.startsWith(">")) { if(awk.sequence.length()>0) { awk.eval(); } awk.sequence.setLength(0); awk.name=line.trim(); } else { awk.sequence.append(line.trim()); } } if(awk.sequence.length()>0) { awk.eval(); } in.close(); } catch (Throwable e) { e.printStackTrace(); } } }