package org.lindenb.sandbox; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Properties; /** * test for for javadb in java1.6 * reads fasta sequence and put then in derby/jdbc * @author Pierre Lindenbaum PhD * */ public class Derby4Fasta { /** sql connection */ private Connection connection=null; /** constructor */ public Derby4Fasta() { } /** * * @param sequence the dna sequence * @return the GC% */ public static double gcPercent(String sequence) { int n=0; for(int i=0;i< sequence.length();++i) { char base= Character.toUpperCase(sequence.charAt(i)); n+=(base=='G' || base=='C'?1:0); } return (double)(n/(double)sequence.length()); } /** open the connection , creates the database derby4fasta if not exists */ public void open() throws SQLException { File database=new File("derby4fasta"); String DRIVER="org.apache.derby.jdbc.EmbeddedDriver"; try { Class driver=Class.forName(DRIVER); boolean create=!database.exists(); driver.newInstance(); String url="jdbc:derby:"; Properties props= new Properties(); if(create) { props.setProperty("create", "true"); } props.setProperty("user", "dba"); props.setProperty("password", ""); props.setProperty("databaseName","derby4fasta"); this.connection = DriverManager.getConnection(url,props); this.connection.setAutoCommit(true); Statement stmt= this.connection.createStatement(); //create the datanbase if(create) { stmt.executeUpdate("create schema FASTA"); /* sadly custom wont't work using 'long varchar' * so I used a varchar(200) */ stmt.executeUpdate( "create table FASTA.SEQUENCE("+ "name varchar(500) not null, "+ "seq varchar(2000) not null"+ ")"); /* the function FASTA.GC call org.lindenb.sandbox.Derby4Fasta.gcPercent() */ stmt.executeUpdate( "create function FASTA.GC( seq VARCHAR(2000) ) returns DOUBLE "+ " LANGUAGE JAVA "+ " NO SQL "+ " PARAMETER STYLE JAVA "+ " EXTERNAL NAME \'org.lindenb.sandbox.Derby4Fasta.gcPercent\'" ); stmt.close(); } } catch(Throwable err) { close(); throw new SQLException(err); } } /** close the connection */ public void close() { try { if(this.connection!=null) this.connection.close(); //In an embedded system, the application shuts down the Derby system by issuing the following JDBC call //Shutdown commands always raise SQLExceptions. DriverManager.getConnection("jdbc:derby:derby4fasta;shutdown=true"); } catch (SQLException e) { //nothing } this.connection=null; } /** put the given sequence in the database, * may throw an exception if the sequence/name is too large*/ public void put(String name,String seq) throws IOException,SQLException { PreparedStatement pstmt= this.connection.prepareStatement( "insert into FASTA.SEQUENCE(name,seq) values (?,?)"); pstmt.setString(1, name); pstmt.setString(2, seq.trim()); pstmt.execute(); } /** read a fasta file, put the sequences in the database */ public void read(Reader reader) throws IOException,SQLException { StringBuilder sequence= new StringBuilder(); String name=null; BufferedReader in= new BufferedReader(reader); String line=null; while((line=in.readLine())!=null) { if(line.startsWith(">")) { if(sequence.length()>0) { put(name,sequence.toString()); } sequence.setLength(0); name=line.trim(); } else { sequence.append(line.trim()); } } if(sequence.length()>0) { put(name,sequence.toString()); } } /** * * @param args */ public static void main(String[] args) { try { int optind=0; Derby4Fasta app= new Derby4Fasta(); app.open(); if(optind< args.length) { //loop over fasta files while(optind< args.length) { FileReader in= new FileReader(args[optind++]); app.read(in); in.close(); } } else { //read from stdin app.read(new InputStreamReader(System.in)); } //find sequences having a GC% > 55% Statement stmt= app.connection.createStatement(); ResultSet row=stmt.executeQuery( "select FASTA.GC(seq)*100.0,name,length(seq) from FASTA.SEQUENCE " + "where FASTA.GC(seq)>0.55"); while(row.next()) { System.out.println(row.getInt(1)+"%\t"+row.getString(2)+"("+row.getInt(3)+")"); } //close the connection app.close(); return; } catch (Exception e) { e.printStackTrace(); } } }