3838// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
3939// OF THE POSSIBILITY OF SUCH DAMAGE.
4040
41- // ignore-android doesn't terminate?
41+ // ignore-android see #10393 #13206
4242
43- #![ feature( slicing_syntax, asm , if_let , tuple_indexing ) ]
43+ #![ feature( slicing_syntax, unboxed_closures , overloaded_calls ) ]
4444
4545extern crate libc;
4646
4747use std:: io:: stdio:: { stdin_raw, stdout_raw} ;
48- use std:: sync:: { Future } ;
4948use std:: num:: { div_rem} ;
5049use std:: ptr:: { copy_memory} ;
5150use std:: io:: { IoResult , EndOfFile } ;
52- use std:: slice:: raw:: { mut_buf_as_slice} ;
5351
54- use shared_memory:: { SharedMemory } ;
55-
56- mod tables {
57- use std:: sync:: { Once , ONCE_INIT } ;
58-
59- /// Lookup tables.
60- static mut CPL16 : [ u16 , ..1 << 16 ] = [ 0 , ..1 << 16 ] ;
61- static mut CPL8 : [ u8 , ..1 << 8 ] = [ 0 , ..1 << 8 ] ;
62-
63- /// Generates the tables.
64- pub fn get ( ) -> Tables {
65- /// To make sure we initialize the tables only once.
66- static INIT : Once = ONCE_INIT ;
67- INIT . doit ( || {
68- unsafe {
69- for i in range ( 0 , 1 << 8 ) {
70- CPL8 [ i] = match i as u8 {
71- b'A' | b'a' => b'T' ,
72- b'C' | b'c' => b'G' ,
73- b'G' | b'g' => b'C' ,
74- b'T' | b't' => b'A' ,
75- b'U' | b'u' => b'A' ,
76- b'M' | b'm' => b'K' ,
77- b'R' | b'r' => b'Y' ,
78- b'W' | b'w' => b'W' ,
79- b'S' | b's' => b'S' ,
80- b'Y' | b'y' => b'R' ,
81- b'K' | b'k' => b'M' ,
82- b'V' | b'v' => b'B' ,
83- b'H' | b'h' => b'D' ,
84- b'D' | b'd' => b'H' ,
85- b'B' | b'b' => b'V' ,
86- b'N' | b'n' => b'N' ,
87- i => i,
88- } ;
89- }
90-
91- for ( i, v) in CPL16 . iter_mut ( ) . enumerate ( ) {
92- * v = * CPL8 . unsafe_get ( i & 255 ) as u16 << 8 |
93- * CPL8 . unsafe_get ( i >> 8 ) as u16 ;
94- }
95- }
96- } ) ;
97- Tables { _dummy : ( ) }
98- }
99-
100- /// Accessor for the static arrays.
101- ///
102- /// To make sure that the tables can't be accessed without having been initialized.
103- pub struct Tables {
104- _dummy : ( )
105- }
106-
107- impl Tables {
108- /// Retreives the complement for `i`.
109- pub fn cpl8 ( self , i : u8 ) -> u8 {
110- // Not really unsafe.
111- unsafe { CPL8 [ i as uint ] }
112- }
113-
114- /// Retreives the complement for `i`.
115- pub fn cpl16 ( self , i : u16 ) -> u16 {
116- unsafe { CPL16 [ i as uint ] }
117- }
118- }
52+ struct Tables {
53+ table8 : [ u8 , ..1 << 8 ] ,
54+ table16 : [ u16 , ..1 << 16 ]
11955}
12056
121- mod shared_memory {
122- use std:: sync:: { Arc } ;
123- use std:: mem:: { transmute} ;
124- use std:: raw:: { Slice } ;
125-
126- /// Structure for sharing disjoint parts of a vector mutably across tasks.
127- pub struct SharedMemory {
128- ptr : Arc < Vec < u8 > > ,
129- start : uint ,
130- len : uint ,
131- }
132-
133- impl SharedMemory {
134- pub fn new ( ptr : Vec < u8 > ) -> SharedMemory {
135- let len = ptr. len ( ) ;
136- SharedMemory {
137- ptr : Arc :: new ( ptr) ,
138- start : 0 ,
139- len : len,
140- }
57+ impl Tables {
58+ fn new ( ) -> Tables {
59+ let mut table8 = [ 0 , ..1 << 8 ] ;
60+ for ( i, v) in table8. iter_mut ( ) . enumerate ( ) {
61+ * v = Tables :: computed_cpl8 ( i as u8 ) ;
14162 }
142-
143- pub fn as_mut_slice ( & mut self ) -> & mut [ u8 ] {
144- unsafe {
145- transmute ( Slice {
146- data : self . ptr . as_ptr ( ) . offset ( self . start as int ) as * const u8 ,
147- len : self . len ,
148- } )
149- }
63+ let mut table16 = [ 0 , ..1 << 16 ] ;
64+ for ( i, v) in table16. iter_mut ( ) . enumerate ( ) {
65+ * v = table8[ i & 255 ] as u16 << 8 |
66+ table8[ i >> 8 ] as u16 ;
15067 }
68+ Tables { table8 : table8, table16 : table16 }
69+ }
15170
152- pub fn len ( & self ) -> uint {
153- self . len
71+ fn computed_cpl8 ( c : u8 ) -> u8 {
72+ match c {
73+ b'A' | b'a' => b'T' ,
74+ b'C' | b'c' => b'G' ,
75+ b'G' | b'g' => b'C' ,
76+ b'T' | b't' => b'A' ,
77+ b'U' | b'u' => b'A' ,
78+ b'M' | b'm' => b'K' ,
79+ b'R' | b'r' => b'Y' ,
80+ b'W' | b'w' => b'W' ,
81+ b'S' | b's' => b'S' ,
82+ b'Y' | b'y' => b'R' ,
83+ b'K' | b'k' => b'M' ,
84+ b'V' | b'v' => b'B' ,
85+ b'H' | b'h' => b'D' ,
86+ b'D' | b'd' => b'H' ,
87+ b'B' | b'b' => b'V' ,
88+ b'N' | b'n' => b'N' ,
89+ i => i,
15490 }
91+ }
15592
156- pub fn split_at ( self , mid : uint ) -> ( SharedMemory , SharedMemory ) {
157- assert ! ( mid <= self . len) ;
158- let left = SharedMemory {
159- ptr : self . ptr . clone ( ) ,
160- start : self . start ,
161- len : mid,
162- } ;
163- let right = SharedMemory {
164- ptr : self . ptr ,
165- start : self . start + mid,
166- len : self . len - mid,
167- } ;
168- ( left, right)
169- }
93+ /// Retreives the complement for `i`.
94+ fn cpl8 ( & self , i : u8 ) -> u8 {
95+ self . table8 [ i as uint ]
96+ }
17097
171- /// Resets the object so that it covers the whole range of the contained vector.
172- ///
173- /// You must not call this method if `self` is not the only reference to the
174- /// shared memory.
175- ///
176- /// FIXME: If `Arc` had a method to check if the reference is unique, then we
177- /// wouldn't need the `unsafe` here.
178- ///
179- /// FIXME: If `Arc` had a method to unwrap the contained value, then we could
180- /// simply unwrap here.
181- pub unsafe fn reset ( self ) -> SharedMemory {
182- let len = self . ptr . len ( ) ;
183- SharedMemory {
184- ptr : self . ptr ,
185- start : 0 ,
186- len : len,
187- }
188- }
98+ /// Retreives the complement for `i`.
99+ fn cpl16 ( & self , i : u16 ) -> u16 {
100+ self . table16 [ i as uint ]
189101 }
190102}
191103
192-
193104/// Reads all remaining bytes from the stream.
194105fn read_to_end < R : Reader > ( r : & mut R ) -> IoResult < Vec < u8 > > {
106+ // As reading the input stream in memory is a bottleneck, we tune
107+ // Reader::read_to_end() with a fast growing policy to limit
108+ // recopies. If MREMAP_RETAIN is implemented in the linux kernel
109+ // and jemalloc use it, this trick will become useless.
195110 const CHUNK : uint = 64 * 1024 ;
196111
197- let mut vec = Vec :: with_capacity ( 1024 * 1024 ) ;
112+ let mut vec = Vec :: with_capacity ( CHUNK ) ;
198113 loop {
114+ // workaround: very fast growing
199115 if vec. capacity ( ) - vec. len ( ) < CHUNK {
200116 let cap = vec. capacity ( ) ;
201117 let mult = if cap < 256 * 1024 * 1024 {
202- // FIXME (mahkoh): Temporary workaround for jemalloc on linux. Replace
203- // this by 2x once the jemalloc preformance issue has been fixed.
204118 16
205119 } else {
206120 2
207121 } ;
208122 vec. reserve_exact ( mult * cap) ;
209123 }
210- unsafe {
211- let ptr = vec. as_mut_ptr ( ) . offset ( vec. len ( ) as int ) ;
212- match mut_buf_as_slice ( ptr, CHUNK , |s| r. read ( s) ) {
213- Ok ( n) => {
214- let len = vec. len ( ) ;
215- vec. set_len ( len + n) ;
216- } ,
217- Err ( ref e) if e. kind == EndOfFile => break ,
218- Err ( e) => return Err ( e) ,
219- }
124+ match r. push_at_least ( 1 , CHUNK , & mut vec) {
125+ Ok ( _) => { }
126+ Err ( ref e) if e. kind == EndOfFile => break ,
127+ Err ( e) => return Err ( e)
220128 }
221129 }
222130 Ok ( vec)
@@ -225,11 +133,8 @@ fn read_to_end<R: Reader>(r: &mut R) -> IoResult<Vec<u8>> {
225133/// Finds the first position at which `b` occurs in `s`.
226134fn memchr ( h : & [ u8 ] , n : u8 ) -> Option < uint > {
227135 use libc:: { c_void, c_int, size_t} ;
228- extern {
229- fn memchr ( h : * const c_void , n : c_int , s : size_t ) -> * mut c_void ;
230- }
231136 let res = unsafe {
232- memchr ( h. as_ptr ( ) as * const c_void , n as c_int , h. len ( ) as size_t )
137+ libc :: memchr ( h. as_ptr ( ) as * const c_void , n as c_int , h. len ( ) as size_t )
233138 } ;
234139 if res. is_null ( ) {
235140 None
@@ -238,13 +143,36 @@ fn memchr(h: &[u8], n: u8) -> Option<uint> {
238143 }
239144}
240145
146+ /// A mutable iterator over DNA sequences
147+ struct MutDnaSeqs < ' a > { s : & ' a mut [ u8 ] }
148+ fn mut_dna_seqs < ' a > ( s : & ' a mut [ u8 ] ) -> MutDnaSeqs < ' a > {
149+ MutDnaSeqs { s : s }
150+ }
151+ impl < ' a > Iterator < & ' a mut [ u8 ] > for MutDnaSeqs < ' a > {
152+ fn next ( & mut self ) -> Option < & ' a mut [ u8 ] > {
153+ let tmp = std:: mem:: replace ( & mut self . s , & mut [ ] ) ;
154+ let tmp = match memchr ( tmp, b'\n' ) {
155+ Some ( i) => tmp. slice_from_mut ( i + 1 ) ,
156+ None => return None ,
157+ } ;
158+ let ( seq, tmp) = match memchr ( tmp, b'>' ) {
159+ Some ( i) => tmp. split_at_mut ( i) ,
160+ None => {
161+ let len = tmp. len ( ) ;
162+ tmp. split_at_mut ( len)
163+ }
164+ } ;
165+ self . s = tmp;
166+ Some ( seq)
167+ }
168+ }
169+
241170/// Length of a normal line without the terminating \n.
242171const LINE_LEN : uint = 60 ;
243172
244173/// Compute the reverse complement.
245- fn reverse_complement ( mut view : SharedMemory , tables : tables:: Tables ) {
246- // Drop the last newline
247- let seq = view. as_mut_slice ( ) . init_mut ( ) ;
174+ fn reverse_complement ( seq : & mut [ u8 ] , tables : & Tables ) {
175+ let seq = seq. init_mut ( ) ; // Drop the last newline
248176 let len = seq. len ( ) ;
249177 let off = LINE_LEN - len % ( LINE_LEN + 1 ) ;
250178 let mut i = LINE_LEN ;
@@ -290,34 +218,36 @@ fn reverse_complement(mut view: SharedMemory, tables: tables::Tables) {
290218 }
291219}
292220
293- fn main ( ) {
294- let mut data = SharedMemory :: new ( read_to_end ( & mut stdin_raw ( ) ) . unwrap ( ) ) ;
295- let tables = tables:: get ( ) ;
296-
297- let mut futures = vec ! ( ) ;
298- loop {
299- let ( _, mut tmp_data) = match memchr ( data. as_mut_slice ( ) , b'\n' ) {
300- Some ( i) => data. split_at ( i + 1 ) ,
301- _ => break ,
302- } ;
303- let ( view, tmp_data) = match memchr ( tmp_data. as_mut_slice ( ) , b'>' ) {
304- Some ( i) => tmp_data. split_at ( i) ,
305- None => {
306- let len = tmp_data. len ( ) ;
307- tmp_data. split_at ( len)
308- } ,
309- } ;
310- futures. push ( Future :: spawn ( proc ( ) reverse_complement ( view, tables) ) ) ;
311- data = tmp_data;
312- }
313-
314- for f in futures. iter_mut ( ) {
315- f. get ( ) ;
221+ /// Executes a closure in parallel over the given iterator over mutable slice.
222+ /// The closure `f` is run in parallel with an element of `iter`.
223+ fn parallel < ' a , I , T , F > ( mut iter : I , f : F )
224+ where T : Send + Sync ,
225+ I : Iterator < & ' a mut [ T ] > ,
226+ F : Fn ( & ' a mut [ T ] ) + Sync {
227+ use std:: mem;
228+ use std:: raw:: Repr ;
229+
230+ let ( tx, rx) = channel ( ) ;
231+ for chunk in iter {
232+ let tx = tx. clone ( ) ;
233+
234+ // Need to convert `f` and `chunk` to something that can cross the task
235+ // boundary.
236+ let f = & f as * const F as * const uint ;
237+ let raw = chunk. repr ( ) ;
238+ spawn ( proc ( ) {
239+ let f = f as * const F ;
240+ unsafe { ( * f) ( mem:: transmute ( raw) ) }
241+ drop ( tx)
242+ } ) ;
316243 }
244+ drop ( tx) ;
245+ for ( ) in rx. iter ( ) { }
246+ }
317247
318- // Not actually unsafe. If Arc had a way to check uniqueness then we could do that in
319- // `reset` and it would tell us that, yes, it is unique at this point.
320- data = unsafe { data . reset ( ) } ;
321-
248+ fn main ( ) {
249+ let mut data = read_to_end ( & mut stdin_raw ( ) ) . unwrap ( ) ;
250+ let tables = & Tables :: new ( ) ;
251+ parallel ( mut_dna_seqs ( data [ mut ] ) , | & : seq| reverse_complement ( seq , tables ) ) ;
322252 stdout_raw ( ) . write ( data. as_mut_slice ( ) ) . unwrap ( ) ;
323253}
0 commit comments