001 /*
002 * RandomSequenceGenerator.java
003 *
004 * Copyright 2003 Sergio Anibal de Carvalho Junior
005 *
006 * This file is part of NeoBio.
007 *
008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of
009 * the GNU General Public License as published by the Free Software Foundation; either
010 * version 2 of the License, or (at your option) any later version.
011 *
012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
014 * PURPOSE. See the GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License along with NeoBio;
017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018 * Boston, MA 02111-1307, USA.
019 *
020 * Proper attribution of the author as the source of the software would be appreciated.
021 *
022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net
023 * Department of Computer Science http://www.dcs.kcl.ac.uk
024 * King's College London, UK http://www.kcl.ac.uk
025 *
026 * Please visit http://neobio.sourceforge.net
027 *
028 * This project was supervised by Professor Maxime Crochemore.
029 *
030 */
031
032 package neobio.textui;
033
034 import java.io.BufferedWriter;
035 import java.io.Writer;
036 import java.io.FileWriter;
037 import java.io.OutputStreamWriter;
038 import java.io.IOException;
039
040 /**
041 * This class is a simple command line based utility for generating random sequences.
042 *
043 * <P>The main method takes three parameters from the command line to generate a
044 * sequence: <CODE>type</CODE>, <CODE>size</CODE> and <CODE>file</CODE>, where:
045 * <UL>
046 * <LI><B><CODE>type</CODE></B> is either <CODE>DNA</CODE> for DNA sequences or
047 * <CODE>PROT</CODE> for protein sequences.
048 * <LI><B><CODE>size</CODE></B> is the number os characters.
049 * <LI><B><CODE>file</CODE></B> (optional) is the name of a file (if ommited, sequence
050 * is written to standard output).
051 * </UL>
052 * </P>
053 *
054 * @author Sergio A. de Carvalho Jr.
055 */
056 public class RandomSequenceGenerator
057 {
058 /**
059 * Character set for DNA sequences.
060 */
061 private static final char DNA_CHARS[] = {'A', 'C', 'G', 'T'};
062
063 /**
064 * Character set for protein sequences.
065 */
066 private static final char PROT_CHARS[] = {'A','R','N','D','C','Q','E','G','H','I',
067 'L','K','M','F','P','S','T','W','Y','V','B','Z','X'};
068
069 /**
070 * The main method takes three parameters from the command line to generate a
071 * sequence. See the class description for details.
072 *
073 * @param args command line arguments
074 */
075 public static void main (String args[])
076 {
077 Writer output;
078 String seq_type, filename;
079 int size, random;
080 char charset[];
081 int qty[];
082
083 try
084 {
085 // get 1st argument (required): file type
086 seq_type = args[0];
087
088 // get 2nd argument (required): number of characters
089 size = Integer.parseInt(args[1]);
090 }
091 catch (ArrayIndexOutOfBoundsException e)
092 {
093 usage();
094 System.exit(1);
095 return;
096 }
097 catch (NumberFormatException e)
098 {
099 usage();
100 System.exit(1);
101 return;
102 }
103
104 // validate character set
105 if (seq_type.equalsIgnoreCase("DNA"))
106 charset = DNA_CHARS;
107 else if (seq_type.equalsIgnoreCase("PROT"))
108 charset = PROT_CHARS;
109 else
110 {
111 // no such option
112 usage();
113 System.exit(1);
114 return;
115 }
116
117 // validate size
118 if (size < 1)
119 {
120 System.err.println ("Error: size must be greater than 1.");
121 System.exit(1);
122 return;
123 }
124
125 try
126 {
127 // get 3rd argument (optional): file name
128 filename = args[2];
129
130 try
131 {
132 // open file for writing
133 output = new BufferedWriter (new FileWriter (filename));
134 }
135 catch (IOException e)
136 {
137 System.err.println ("Error: couldn't open " + filename + " for writing.");
138 e.printStackTrace();
139 System.exit(2);
140 return;
141 }
142 }
143 catch (ArrayIndexOutOfBoundsException e)
144 {
145 // file name was ommited, use standard output
146 filename = null;
147 output = new OutputStreamWriter (System.out);
148 }
149
150 // alocate a vector of characters
151 qty = new int[charset.length];
152
153 try
154 {
155 // write sequence
156 for (int i = 0; i < size; i++)
157 {
158 // choose a character randomly
159 random = (int) (Math.random() * charset.length);
160
161 // keep track of how many characters
162 // have been writen
163 qty[random]++;
164
165 output.write(charset[random]);
166 }
167
168 output.flush();
169
170 if (filename != null) output.close();
171 }
172 catch (IOException e)
173 {
174 System.err.println ("Error: failed to write sequence.");
175 e.printStackTrace();
176 System.exit(2);
177 return;
178 }
179
180 // print character distribution
181 //for (int i = 0; i < charset.length; i++)
182 //System.err.println(charset[i] + ": " + qty[i]);
183
184 System.exit(0);
185 }
186
187 /**
188 * Prints command line usage.
189 */
190 private static void usage ()
191 {
192 System.err.println(
193 "\nUsage: RandomSequenceGenerator <type> <size> [<file>]\n\n" +
194 "where:\n\n" +
195 " <type> = DNA for nucleotide sequences\n" +
196 " or PROT for protein sequences\n\n" +
197 " <size> = number os characters\n\n" +
198 " <file> = name of a file to where the sequence is to be written\n" +
199 " (if ommited, sequence is written to standard output)"
200 );
201 }
202 }