summaryrefslogtreecommitdiff
path: root/lib/zlib/inffast.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/zlib/inffast.c')
-rw-r--r--lib/zlib/inffast.c176
1 files changed, 111 insertions, 65 deletions
diff --git a/lib/zlib/inffast.c b/lib/zlib/inffast.c
index 5e2a65ad4d2..e3c7f3b892b 100644
--- a/lib/zlib/inffast.c
+++ b/lib/zlib/inffast.c
@@ -1,5 +1,5 @@
/* inffast.c -- fast decoding
- * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
+ * Copyright (C) 1995-2004 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
@@ -12,6 +12,25 @@
#ifndef ASMINF
+/* Allow machine dependent optimization for post-increment or pre-increment.
+ Based on testing to date,
+ Pre-increment preferred for:
+ - PowerPC G3 (Adler)
+ - MIPS R5000 (Randers-Pehrson)
+ Post-increment preferred for:
+ - none
+ No measurable difference:
+ - Pentium III (Anderson)
+ - M68060 (Nikl)
+ */
+#ifdef POSTINC
+# define OFF 0
+# define PUP(a) *(a)++
+#else
+# define OFF 1
+# define PUP(a) *++(a)
+#endif
+
/*
Decode literal, length, and distance codes and write out the resulting
literal and match bytes until either not enough input or output is
@@ -47,13 +66,12 @@
requires strm->avail_out >= 258 for each loop to avoid checking for
output space.
*/
-void ZLIB_INTERNAL inflate_fast(strm, start)
-z_streamp strm;
-unsigned start; /* inflate()'s starting value for strm->avail_out */
+void inflate_fast(z_streamp strm, unsigned start)
+/* start: inflate()'s starting value for strm->avail_out */
{
struct inflate_state FAR *state;
- z_const unsigned char FAR *in; /* local strm->next_in */
- z_const unsigned char FAR *last; /* have enough input while in < last */
+ unsigned char FAR *in; /* local strm->next_in */
+ unsigned char FAR *last; /* while in < last, enough input available */
unsigned char FAR *out; /* local strm->next_out */
unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
unsigned char FAR *end; /* while out < end, enough space available */
@@ -62,7 +80,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
#endif
unsigned wsize; /* window size or zero if not using window */
unsigned whave; /* valid bytes in the window */
- unsigned wnext; /* window write index */
+ unsigned write; /* window write index */
unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
unsigned long hold; /* local strm->hold */
unsigned bits; /* local strm->bits */
@@ -70,7 +88,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
code const FAR *dcode; /* local strm->distcode */
unsigned lmask; /* mask for first level of length codes */
unsigned dmask; /* mask for first level of distance codes */
- code here; /* retrieved table entry */
+ code this; /* retrieved table entry */
unsigned op; /* code bits, operation, extra bits, or */
/* window position, window bytes to copy */
unsigned len; /* match length, unused bytes */
@@ -79,7 +97,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
/* copy state to local variables */
state = (struct inflate_state FAR *)strm->state;
- in = strm->next_in;
+ in = strm->next_in - OFF;
last = in + (strm->avail_in - 5);
if (in > last && strm->avail_in > 5) {
/*
@@ -89,7 +107,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
strm->avail_in = 0xffffffff - (uintptr_t)in;
last = in + (strm->avail_in - 5);
}
- out = strm->next_out;
+ out = strm->next_out - OFF;
beg = out - (start - strm->avail_out);
end = out + (strm->avail_out - 257);
#ifdef INFLATE_STRICT
@@ -97,7 +115,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
#endif
wsize = state->wsize;
whave = state->whave;
- wnext = state->wnext;
+ write = state->write;
window = state->window;
hold = state->hold;
bits = state->bits;
@@ -110,29 +128,29 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
input data or output space */
do {
if (bits < 15) {
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
}
- here = lcode[hold & lmask];
+ this = lcode[hold & lmask];
dolen:
- op = (unsigned)(here.bits);
+ op = (unsigned)(this.bits);
hold >>= op;
bits -= op;
- op = (unsigned)(here.op);
+ op = (unsigned)(this.op);
if (op == 0) { /* literal */
- Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+ Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
"inflate: literal '%c'\n" :
- "inflate: literal 0x%02x\n", here.val));
- *out++ = (unsigned char)(here.val);
+ "inflate: literal 0x%02x\n", this.val));
+ PUP(out) = (unsigned char)(this.val);
}
else if (op & 16) { /* length base */
- len = (unsigned)(here.val);
+ len = (unsigned)(this.val);
op &= 15; /* number of extra bits */
if (op) {
if (bits < op) {
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
}
len += (unsigned)hold & ((1U << op) - 1);
@@ -141,25 +159,25 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
}
Tracevv((stderr, "inflate: length %u\n", len));
if (bits < 15) {
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
}
- here = dcode[hold & dmask];
+ this = dcode[hold & dmask];
dodist:
- op = (unsigned)(here.bits);
+ op = (unsigned)(this.bits);
hold >>= op;
bits -= op;
- op = (unsigned)(here.op);
+ op = (unsigned)(this.op);
if (op & 16) { /* distance base */
- dist = (unsigned)(here.val);
+ dist = (unsigned)(this.val);
op &= 15; /* number of extra bits */
if (bits < op) {
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
if (bits < op) {
- hold += (unsigned long)(*in++) << bits;
+ hold += (unsigned long)(PUP(in)) << bits;
bits += 8;
}
}
@@ -178,80 +196,108 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
if (dist > op) { /* see if copy from window */
op = dist - op; /* distance back in window */
if (op > whave) {
- strm->msg =
- (char *)"invalid distance too far back";
+ strm->msg = (char *)"invalid distance too far back";
state->mode = BAD;
break;
}
- from = window;
- if (wnext == 0) { /* very common case */
+ from = window - OFF;
+ if (write == 0) { /* very common case */
from += wsize - op;
if (op < len) { /* some from window */
len -= op;
do {
- *out++ = *from++;
+ PUP(out) = PUP(from);
} while (--op);
from = out - dist; /* rest from output */
}
}
- else if (wnext < op) { /* wrap around window */
- from += wsize + wnext - op;
- op -= wnext;
+ else if (write < op) { /* wrap around window */
+ from += wsize + write - op;
+ op -= write;
if (op < len) { /* some from end of window */
len -= op;
do {
- *out++ = *from++;
+ PUP(out) = PUP(from);
} while (--op);
- from = window;
- if (wnext < len) { /* some from start of window */
- op = wnext;
+ from = window - OFF;
+ if (write < len) { /* some from start of window */
+ op = write;
len -= op;
do {
- *out++ = *from++;
+ PUP(out) = PUP(from);
} while (--op);
from = out - dist; /* rest from output */
}
}
}
else { /* contiguous in window */
- from += wnext - op;
+ from += write - op;
if (op < len) { /* some from window */
len -= op;
do {
- *out++ = *from++;
+ PUP(out) = PUP(from);
} while (--op);
from = out - dist; /* rest from output */
}
}
while (len > 2) {
- *out++ = *from++;
- *out++ = *from++;
- *out++ = *from++;
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
len -= 3;
}
if (len) {
- *out++ = *from++;
+ PUP(out) = PUP(from);
if (len > 1)
- *out++ = *from++;
+ PUP(out) = PUP(from);
}
}
else {
+ unsigned short *sout;
+ unsigned long loops;
+
from = out - dist; /* copy direct from output */
- do { /* minimum length is three */
- *out++ = *from++;
- *out++ = *from++;
- *out++ = *from++;
- len -= 3;
- } while (len > 2);
- if (len) {
- *out++ = *from++;
- if (len > 1)
- *out++ = *from++;
- }
+ /* minimum length is three */
+ /* Align out addr */
+ if (!((long)(out - 1 + OFF) & 1)) {
+ PUP(out) = PUP(from);
+ len--;
+ }
+ sout = (unsigned short *)(out - OFF);
+ if (dist > 2 ) {
+ unsigned short *sfrom;
+
+ sfrom = (unsigned short *)(from - OFF);
+ loops = len >> 1;
+ do
+ PUP(sout) = get_unaligned(++sfrom);
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ from = (unsigned char *)sfrom + OFF;
+ } else { /* dist == 1 or dist == 2 */
+ unsigned short pat16;
+
+ pat16 = *(sout-2+2*OFF);
+ if (dist == 1)
+#if defined(__BIG_ENDIAN)
+ pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
+#elif defined(__LITTLE_ENDIAN)
+ pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
+#else
+#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
+#endif
+ loops = len >> 1;
+ do
+ PUP(sout) = pat16;
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ }
+ if (len & 1)
+ PUP(out) = PUP(from);
}
}
else if ((op & 64) == 0) { /* 2nd level distance code */
- here = dcode[here.val + (hold & ((1U << op) - 1))];
+ this = dcode[this.val + (hold & ((1U << op) - 1))];
goto dodist;
}
else {
@@ -261,7 +307,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
}
}
else if ((op & 64) == 0) { /* 2nd level length code */
- here = lcode[here.val + (hold & ((1U << op) - 1))];
+ this = lcode[this.val + (hold & ((1U << op) - 1))];
goto dolen;
}
else if (op & 32) { /* end-of-block */
@@ -283,8 +329,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
hold &= (1U << bits) - 1;
/* update state and return */
- strm->next_in = in;
- strm->next_out = out;
+ strm->next_in = in + OFF;
+ strm->next_out = out + OFF;
strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
strm->avail_out = (unsigned)(out < end ?
257 + (end - out) : 257 - (out - end));
@@ -297,7 +343,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
- Using bit fields for code structure
- Different op definition to avoid & for extra bits (do & for table bits)
- - Three separate decoding do-loops for direct, window, and wnext == 0
+ - Three separate decoding do-loops for direct, window, and write == 0
- Special case for distance > 1 copies to do overlapped load and store copy
- Explicit branch predictions (based on measured branch probabilities)
- Deferring match copy and interspersed it with decoding subsequent codes