diff options
Diffstat (limited to 'lib/zlib/inffast.c')
-rw-r--r-- | lib/zlib/inffast.c | 176 |
1 files changed, 111 insertions, 65 deletions
diff --git a/lib/zlib/inffast.c b/lib/zlib/inffast.c index 5e2a65ad4d2..e3c7f3b892b 100644 --- a/lib/zlib/inffast.c +++ b/lib/zlib/inffast.c @@ -1,5 +1,5 @@ /* inffast.c -- fast decoding - * Copyright (C) 1995-2008, 2010, 2013 Mark Adler + * Copyright (C) 1995-2004 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -12,6 +12,25 @@ #ifndef ASMINF +/* Allow machine dependent optimization for post-increment or pre-increment. + Based on testing to date, + Pre-increment preferred for: + - PowerPC G3 (Adler) + - MIPS R5000 (Randers-Pehrson) + Post-increment preferred for: + - none + No measurable difference: + - Pentium III (Anderson) + - M68060 (Nikl) + */ +#ifdef POSTINC +# define OFF 0 +# define PUP(a) *(a)++ +#else +# define OFF 1 +# define PUP(a) *++(a) +#endif + /* Decode literal, length, and distance codes and write out the resulting literal and match bytes until either not enough input or output is @@ -47,13 +66,12 @@ requires strm->avail_out >= 258 for each loop to avoid checking for output space. */ -void ZLIB_INTERNAL inflate_fast(strm, start) -z_streamp strm; -unsigned start; /* inflate()'s starting value for strm->avail_out */ +void inflate_fast(z_streamp strm, unsigned start) +/* start: inflate()'s starting value for strm->avail_out */ { struct inflate_state FAR *state; - z_const unsigned char FAR *in; /* local strm->next_in */ - z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *in; /* local strm->next_in */ + unsigned char FAR *last; /* while in < last, enough input available */ unsigned char FAR *out; /* local strm->next_out */ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ unsigned char FAR *end; /* while out < end, enough space available */ @@ -62,7 +80,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif unsigned wsize; /* window size or zero if not using window */ unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ + unsigned write; /* window write index */ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ unsigned long hold; /* local strm->hold */ unsigned bits; /* local strm->bits */ @@ -70,7 +88,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ code const FAR *dcode; /* local strm->distcode */ unsigned lmask; /* mask for first level of length codes */ unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ + code this; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ @@ -79,7 +97,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ /* copy state to local variables */ state = (struct inflate_state FAR *)strm->state; - in = strm->next_in; + in = strm->next_in - OFF; last = in + (strm->avail_in - 5); if (in > last && strm->avail_in > 5) { /* @@ -89,7 +107,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ strm->avail_in = 0xffffffff - (uintptr_t)in; last = in + (strm->avail_in - 5); } - out = strm->next_out; + out = strm->next_out - OFF; beg = out - (start - strm->avail_out); end = out + (strm->avail_out - 257); #ifdef INFLATE_STRICT @@ -97,7 +115,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif wsize = state->wsize; whave = state->whave; - wnext = state->wnext; + write = state->write; window = state->window; hold = state->hold; bits = state->bits; @@ -110,29 +128,29 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ input data or output space */ do { if (bits < 15) { - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; } - here = lcode[hold & lmask]; + this = lcode[hold & lmask]; dolen: - op = (unsigned)(here.bits); + op = (unsigned)(this.bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(this.op); if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); + "inflate: literal 0x%02x\n", this.val)); + PUP(out) = (unsigned char)(this.val); } else if (op & 16) { /* length base */ - len = (unsigned)(here.val); + len = (unsigned)(this.val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; } len += (unsigned)hold & ((1U << op) - 1); @@ -141,25 +159,25 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } Tracevv((stderr, "inflate: length %u\n", len)); if (bits < 15) { - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; } - here = dcode[hold & dmask]; + this = dcode[hold & dmask]; dodist: - op = (unsigned)(here.bits); + op = (unsigned)(this.bits); hold >>= op; bits -= op; - op = (unsigned)(here.op); + op = (unsigned)(this.op); if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); + dist = (unsigned)(this.val); op &= 15; /* number of extra bits */ if (bits < op) { - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; if (bits < op) { - hold += (unsigned long)(*in++) << bits; + hold += (unsigned long)(PUP(in)) << bits; bits += 8; } } @@ -178,80 +196,108 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ if (dist > op) { /* see if copy from window */ op = dist - op; /* distance back in window */ if (op > whave) { - strm->msg = - (char *)"invalid distance too far back"; + strm->msg = (char *)"invalid distance too far back"; state->mode = BAD; break; } - from = window; - if (wnext == 0) { /* very common case */ + from = window - OFF; + if (write == 0) { /* very common case */ from += wsize - op; if (op < len) { /* some from window */ len -= op; do { - *out++ = *from++; + PUP(out) = PUP(from); } while (--op); from = out - dist; /* rest from output */ } } - else if (wnext < op) { /* wrap around window */ - from += wsize + wnext - op; - op -= wnext; + else if (write < op) { /* wrap around window */ + from += wsize + write - op; + op -= write; if (op < len) { /* some from end of window */ len -= op; do { - *out++ = *from++; + PUP(out) = PUP(from); } while (--op); - from = window; - if (wnext < len) { /* some from start of window */ - op = wnext; + from = window - OFF; + if (write < len) { /* some from start of window */ + op = write; len -= op; do { - *out++ = *from++; + PUP(out) = PUP(from); } while (--op); from = out - dist; /* rest from output */ } } } else { /* contiguous in window */ - from += wnext - op; + from += write - op; if (op < len) { /* some from window */ len -= op; do { - *out++ = *from++; + PUP(out) = PUP(from); } while (--op); from = out - dist; /* rest from output */ } } while (len > 2) { - *out++ = *from++; - *out++ = *from++; - *out++ = *from++; + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); len -= 3; } if (len) { - *out++ = *from++; + PUP(out) = PUP(from); if (len > 1) - *out++ = *from++; + PUP(out) = PUP(from); } } else { + unsigned short *sout; + unsigned long loops; + from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - *out++ = *from++; - *out++ = *from++; - *out++ = *from++; - len -= 3; - } while (len > 2); - if (len) { - *out++ = *from++; - if (len > 1) - *out++ = *from++; - } + /* minimum length is three */ + /* Align out addr */ + if (!((long)(out - 1 + OFF) & 1)) { + PUP(out) = PUP(from); + len--; + } + sout = (unsigned short *)(out - OFF); + if (dist > 2 ) { + unsigned short *sfrom; + + sfrom = (unsigned short *)(from - OFF); + loops = len >> 1; + do + PUP(sout) = get_unaligned(++sfrom); + while (--loops); + out = (unsigned char *)sout + OFF; + from = (unsigned char *)sfrom + OFF; + } else { /* dist == 1 or dist == 2 */ + unsigned short pat16; + + pat16 = *(sout-2+2*OFF); + if (dist == 1) +#if defined(__BIG_ENDIAN) + pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8); +#elif defined(__LITTLE_ENDIAN) + pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8); +#else +#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined +#endif + loops = len >> 1; + do + PUP(sout) = pat16; + while (--loops); + out = (unsigned char *)sout + OFF; + } + if (len & 1) + PUP(out) = PUP(from); } } else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; + this = dcode[this.val + (hold & ((1U << op) - 1))]; goto dodist; } else { @@ -261,7 +307,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; + this = lcode[this.val + (hold & ((1U << op) - 1))]; goto dolen; } else if (op & 32) { /* end-of-block */ @@ -283,8 +329,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold &= (1U << bits) - 1; /* update state and return */ - strm->next_in = in; - strm->next_out = out; + strm->next_in = in + OFF; + strm->next_out = out + OFF; strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); strm->avail_out = (unsigned)(out < end ? 257 + (end - out) : 257 - (out - end)); @@ -297,7 +343,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - Using bit fields for code structure - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and wnext == 0 + - Three separate decoding do-loops for direct, window, and write == 0 - Special case for distance > 1 copies to do overlapped load and store copy - Explicit branch predictions (based on measured branch probabilities) - Deferring match copy and interspersed it with decoding subsequent codes |