@@ -21,48 +21,121 @@ const MAXDCODES = 30;
2121const MAXCODES = MAXLCODES + MAXDCODES ;
2222const FIXLCODES = 288 ;
2323
24+ // The maximum length of a Huffman code's prefix we can decode using the fast
25+ // path. The factor 9 is inherited from Zlib, tweaking the value showed little
26+ // or no changes in the profiler output.
27+ const PREFIX_LUT_BITS = 9 ;
28+
2429const Huffman = struct {
30+ // Number of codes for each possible length
2531 count : [MAXBITS + 1 ]u16 ,
32+ // Mapping between codes and symbols
2633 symbol : [MAXCODES ]u16 ,
2734
28- fn construct (self : * Huffman , length : []const u16 ) ! void {
35+ // The decoding process uses a trick explained by Mark Adler in [1].
36+ // We basically precompute for a fixed number of codes (0 <= x <= 2^N-1)
37+ // the symbol and the effective code length we'd get if the decoder was run
38+ // on the given N-bit sequence.
39+ // A code with length 0 means the sequence is not a valid prefix for this
40+ // canonical Huffman code and we have to decode it using a slower method.
41+ //
42+ // [1] https://github.com/madler/zlib/blob/v1.2.11/doc/algorithm.txt#L58
43+ prefix_lut : [1 << PREFIX_LUT_BITS ]u16 ,
44+ prefix_lut_len : [1 << PREFIX_LUT_BITS ]u16 ,
45+ // The following info refer to the codes of length PREFIX_LUT_BITS+1 and are
46+ // used to bootstrap the bit-by-bit reading method if the fast-path fails.
47+ last_code : u16 ,
48+ last_index : u16 ,
49+
50+ fn construct (self : * Huffman , code_length : []const u16 ) ! void {
2951 for (self .count ) | * val | {
3052 val .* = 0 ;
3153 }
3254
33- for (length ) | val | {
34- self .count [val ] += 1 ;
55+ for (code_length ) | len | {
56+ self .count [len ] += 1 ;
3557 }
3658
37- if (self .count [0 ] == length .len )
59+ // All zero.
60+ if (self .count [0 ] == code_length .len )
3861 return ;
3962
4063 var left : isize = 1 ;
4164 for (self .count [1.. ]) | val | {
65+ // Each added bit doubles the amount of codes.
4266 left *= 2 ;
67+ // Make sure the number of codes with this length isn't too high.
4368 left -= @as (isize , @bitCast (i16 , val ));
4469 if (left < 0 )
4570 return error .InvalidTree ;
4671 }
4772
48- var offs : [MAXBITS + 1 ]u16 = undefined ;
73+ // Compute the offset of the first symbol represented by a code of a
74+ // given length in the symbol table, together with the first canonical
75+ // Huffman code for that length.
76+ var offset : [MAXBITS + 1 ]u16 = undefined ;
77+ var codes : [MAXBITS + 1 ]u16 = undefined ;
4978 {
79+ offset [1 ] = 0 ;
80+ codes [1 ] = 0 ;
5081 var len : usize = 1 ;
51- offs [1 ] = 0 ;
5282 while (len < MAXBITS ) : (len += 1 ) {
53- offs [len + 1 ] = offs [len ] + self .count [len ];
83+ offset [len + 1 ] = offset [len ] + self .count [len ];
84+ codes [len + 1 ] = (codes [len ] + self .count [len ]) << 1 ;
5485 }
5586 }
5687
57- for (length ) | val , symbol | {
58- if (val != 0 ) {
59- self .symbol [offs [val ]] = @truncate (u16 , symbol );
60- offs [val ] += 1 ;
88+ self .prefix_lut_len = mem .zeroes (@TypeOf (self .prefix_lut_len ));
89+
90+ for (code_length ) | len , symbol | {
91+ if (len != 0 ) {
92+ // Fill the symbol table.
93+ // The symbols are assigned sequentially for each length.
94+ self .symbol [offset [len ]] = @truncate (u16 , symbol );
95+ // Track the last assigned offset
96+ offset [len ] += 1 ;
97+ }
98+
99+ if (len == 0 or len > PREFIX_LUT_BITS )
100+ continue ;
101+
102+ // Given a Huffman code of length N we have to massage it so
103+ // that it becomes an index in the lookup table.
104+ // The bit order is reversed as the fast path reads the bit
105+ // sequence MSB to LSB using an &, the order is flipped wrt the
106+ // one obtained by reading bit-by-bit.
107+ // The codes are prefix-free, if the prefix matches we can
108+ // safely ignore the trail bits. We do so by replicating the
109+ // symbol info for each combination of the trailing bits.
110+ const bits_to_fill = @intCast (u5 , PREFIX_LUT_BITS - len );
111+ const rev_code = bitReverse (codes [len ], len );
112+ // Track the last used code, but only for lengths < PREFIX_LUT_BITS
113+ codes [len ] += 1 ;
114+
115+ var j : usize = 0 ;
116+ while (j < @as (usize , 1 ) << bits_to_fill ) : (j += 1 ) {
117+ const index = rev_code | (j << @intCast (u5 , len ));
118+ assert (self .prefix_lut_len [index ] == 0 );
119+ self .prefix_lut [index ] = @truncate (u16 , symbol );
120+ self .prefix_lut_len [index ] = @truncate (u16 , len );
61121 }
62122 }
123+
124+ self .last_code = codes [PREFIX_LUT_BITS + 1 ];
125+ self .last_index = offset [PREFIX_LUT_BITS + 1 ] - self .count [PREFIX_LUT_BITS + 1 ];
63126 }
64127};
65128
129+ // Reverse bit-by-bit a N-bit value
130+ fn bitReverse (x : usize , N : usize ) usize {
131+ var tmp : usize = 0 ;
132+ var i : usize = 0 ;
133+ while (i < N ) : (i += 1 ) {
134+ tmp |= ((x >> @intCast (u5 , i )) & 1 ) << @intCast (u5 , N - i - 1 );
135+ }
136+ return tmp ;
137+ }
138+
66139pub fn InflateStream (comptime ReaderType : type ) type {
67140 return struct {
68141 const Self = @This ();
@@ -83,7 +156,7 @@ pub fn InflateStream(comptime ReaderType: type) type {
83156 };
84157 pub const Reader = io .Reader (* Self , Error , read );
85158
86- bit_reader : io . BitReader ( .Little , ReaderType ) ,
159+ inner_reader : ReaderType ,
87160
88161 // True if the decoder met the end of the compressed stream, no further
89162 // data can be decompressed
@@ -135,7 +208,7 @@ pub fn InflateStream(comptime ReaderType: type) type {
135208
136209 // Insert a single byte into the window.
137210 // Assumes there's enough space.
138- fn appendUnsafe (self : * WSelf , value : u8 ) void {
211+ inline fn appendUnsafe (self : * WSelf , value : u8 ) void {
139212 self .buf [self .wi ] = value ;
140213 self .wi = (self .wi + 1 ) & (self .buf .len - 1 );
141214 self .el += 1 ;
@@ -180,7 +253,7 @@ pub fn InflateStream(comptime ReaderType: type) type {
180253 // of the window memory for the non-overlapping case.
181254 var i : usize = 0 ;
182255 while (i < N ) : (i += 1 ) {
183- const index = (self .wi -% distance ) % self .buf .len ;
256+ const index = (self .wi -% distance ) & ( self .buf .len - 1 ) ;
184257 self .appendUnsafe (self .buf [index ]);
185258 }
186259
@@ -196,13 +269,36 @@ pub fn InflateStream(comptime ReaderType: type) type {
196269 hdist : * Huffman ,
197270 hlen : * Huffman ,
198271
272+ // Temporary buffer for the bitstream, only bits 0..`bits_left` are
273+ // considered valid.
274+ bits : u32 ,
275+ bits_left : usize ,
276+
277+ fn peekBits (self : * Self , bits : usize ) ! u32 {
278+ while (self .bits_left < bits ) {
279+ const byte = try self .inner_reader .readByte ();
280+ self .bits |= @as (u32 , byte ) << @intCast (u5 , self .bits_left );
281+ self .bits_left += 8 ;
282+ }
283+ return self .bits & ((@as (u32 , 1 ) << @intCast (u5 , bits )) - 1 );
284+ }
285+ fn readBits (self : * Self , bits : usize ) ! u32 {
286+ const val = self .peekBits (bits );
287+ self .discardBits (bits );
288+ return val ;
289+ }
290+ fn discardBits (self : * Self , bits : usize ) void {
291+ self .bits >>= @intCast (u5 , bits );
292+ self .bits_left -= bits ;
293+ }
294+
199295 fn stored (self : * Self ) ! void {
200296 // Discard the remaining bits, the lenght field is always
201297 // byte-aligned (and so is the data)
202- self .bit_reader . alignToByte ( );
298+ self .discardBits ( self . bits_left );
203299
204- const length = ( try self .bit_reader . readBitsNoEof (u16 , 16 ) );
205- const length_cpl = ( try self .bit_reader . readBitsNoEof (u16 , 16 ) );
300+ const length = try self .inner_reader . readIntLittle (u16 );
301+ const length_cpl = try self .inner_reader . readIntLittle (u16 );
206302
207303 if (length != ~ length_cpl )
208304 return error .InvalidStoredSize ;
@@ -237,11 +333,11 @@ pub fn InflateStream(comptime ReaderType: type) type {
237333
238334 fn dynamic (self : * Self ) ! void {
239335 // Number of length codes
240- const nlen = (try self .bit_reader . readBitsNoEof ( usize , 5 )) + 257 ;
336+ const nlen = (try self .readBits ( 5 )) + 257 ;
241337 // Number of distance codes
242- const ndist = (try self .bit_reader . readBitsNoEof ( usize , 5 )) + 1 ;
338+ const ndist = (try self .readBits ( 5 )) + 1 ;
243339 // Number of code length codes
244- const ncode = (try self .bit_reader . readBitsNoEof ( usize , 4 )) + 4 ;
340+ const ncode = (try self .readBits ( 4 )) + 4 ;
245341
246342 if (nlen > MAXLCODES or ndist > MAXDCODES )
247343 return error .BadCounts ;
@@ -259,7 +355,7 @@ pub fn InflateStream(comptime ReaderType: type) type {
259355
260356 // Read the code lengths, missing ones are left as zero
261357 for (ORDER [0.. ncode ]) | val | {
262- lengths [val ] = try self .bit_reader . readBitsNoEof ( u16 , 3 );
358+ lengths [val ] = @intCast ( u16 , try self .readBits ( 3 ) );
263359 }
264360
265361 try lencode .construct (lengths [0.. ]);
@@ -284,19 +380,19 @@ pub fn InflateStream(comptime ReaderType: type) type {
284380 if (i == 0 ) return error .NoLastLength ;
285381
286382 const last_length = lengths [i - 1 ];
287- const repeat = 3 + (try self .bit_reader . readBitsNoEof ( usize , 2 ));
383+ const repeat = 3 + (try self .readBits ( 2 ));
288384 const last_index = i + repeat ;
289385 while (i < last_index ) : (i += 1 ) {
290386 lengths [i ] = last_length ;
291387 }
292388 },
293389 17 = > {
294390 // repeat zero 3..10 times
295- i += 3 + (try self .bit_reader . readBitsNoEof ( usize , 3 ));
391+ i += 3 + (try self .readBits ( 3 ));
296392 },
297393 18 = > {
298394 // repeat zero 11..138 times
299- i += 11 + (try self .bit_reader . readBitsNoEof ( usize , 7 ));
395+ i += 11 + (try self .readBits ( 7 ));
300396 },
301397 else = > return error .InvalidSymbol ,
302398 }
@@ -359,11 +455,11 @@ pub fn InflateStream(comptime ReaderType: type) type {
359455 // Length/distance pair
360456 const length_symbol = symbol - 257 ;
361457 const length = LENS [length_symbol ] +
362- try self .bit_reader . readBitsNoEof ( u16 , LEXT [length_symbol ]);
458+ @intCast ( u16 , try self .readBits ( LEXT [length_symbol ]) );
363459
364460 const distance_symbol = try self .decode (distcode );
365461 const distance = DISTS [distance_symbol ] +
366- try self .bit_reader . readBitsNoEof ( u16 , DEXT [distance_symbol ]);
462+ @intCast ( u16 , try self .readBits ( DEXT [distance_symbol ]) );
367463
368464 if (distance > self .window .buf .len )
369465 return error .InvalidDistance ;
@@ -385,13 +481,29 @@ pub fn InflateStream(comptime ReaderType: type) type {
385481 }
386482
387483 fn decode (self : * Self , h : * Huffman ) ! u16 {
388- var len : usize = 1 ;
389- var code : usize = 0 ;
390- var first : usize = 0 ;
391- var index : usize = 0 ;
484+ // Fast path, read some bits and hope they're prefixes of some code
485+ const prefix = try self .peekBits (PREFIX_LUT_BITS );
486+ if (h .prefix_lut_len [prefix ] != 0 ) {
487+ self .discardBits (h .prefix_lut_len [prefix ]);
488+ return h .prefix_lut [prefix ];
489+ }
490+
491+ // The sequence we've read is not a prefix of any code of length <=
492+ // PREFIX_LUT_BITS, keep decoding it using a slower method
493+ self .discardBits (PREFIX_LUT_BITS );
494+
495+ // Speed up the decoding by starting from the first code length
496+ // that's not covered by the table
497+ var len : usize = PREFIX_LUT_BITS + 1 ;
498+ var first : usize = h .last_code ;
499+ var index : usize = h .last_index ;
500+
501+ // Reverse the prefix so that the LSB becomes the MSB and make space
502+ // for the next bit
503+ var code = bitReverse (prefix , PREFIX_LUT_BITS + 1 );
392504
393505 while (len <= MAXBITS ) : (len += 1 ) {
394- code |= try self .bit_reader . readBitsNoEof ( usize , 1 );
506+ code |= try self .readBits ( 1 );
395507 const count = h .count [len ];
396508 if (code < first + count )
397509 return h .symbol [index + (code - first )];
@@ -411,8 +523,8 @@ pub fn InflateStream(comptime ReaderType: type) type {
411523 // The compressed stream is done
412524 if (self .seen_eos ) return ;
413525
414- const last = try self .bit_reader . readBitsNoEof ( u1 , 1 );
415- const kind = try self .bit_reader . readBitsNoEof ( u2 , 2 );
526+ const last = @intCast ( u1 , try self .readBits ( 1 ) );
527+ const kind = @intCast ( u2 , try self .readBits ( 2 ) );
416528
417529 self .seen_eos = last != 0 ;
418530
@@ -439,7 +551,7 @@ pub fn InflateStream(comptime ReaderType: type) type {
439551 var i : usize = 0 ;
440552 while (i < N ) : (i += 1 ) {
441553 var tmp : [1 ]u8 = undefined ;
442- if ((try self .bit_reader .read (& tmp )) != 1 ) {
554+ if ((try self .inner_reader .read (& tmp )) != 1 ) {
443555 // Unexpected end of stream, keep this error
444556 // consistent with the use of readBitsNoEof
445557 return error .EndOfStream ;
@@ -478,12 +590,14 @@ pub fn InflateStream(comptime ReaderType: type) type {
478590 assert (math .isPowerOfTwo (window_slice .len ));
479591
480592 return Self {
481- .bit_reader = io . bitReader ( .Little , source ) ,
593+ .inner_reader = source ,
482594 .window = .{ .buf = window_slice },
483595 .seen_eos = false ,
484596 .state = .DecodeBlockHeader ,
485597 .hdist = undefined ,
486598 .hlen = undefined ,
599+ .bits = 0 ,
600+ .bits_left = 0 ,
487601 };
488602 }
489603
0 commit comments