source: trunk/third/zlib/trees.c @ 17315

Revision 17315, 42.7 KB checked in by zacheiss, 23 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r17314, which included commits to RCS files with non-trunk default branches.
RevLine 
[15210]1/* trees.c -- output deflated data using Huffman coding
[17314]2 * Copyright (C) 1995-2002 Jean-loup Gailly
[15210]3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/*
7 *  ALGORITHM
8 *
9 *      The "deflation" process uses several Huffman trees. The more
10 *      common source values are represented by shorter bit sequences.
11 *
12 *      Each code tree is stored in a compressed form which is itself
13 * a Huffman encoding of the lengths of all the code strings (in
14 * ascending order by source values).  The actual code strings are
15 * reconstructed from the lengths in the inflate process, as described
16 * in the deflate specification.
17 *
18 *  REFERENCES
19 *
20 *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
21 *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
22 *
23 *      Storer, James A.
24 *          Data Compression:  Methods and Theory, pp. 49-50.
25 *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
26 *
27 *      Sedgewick, R.
28 *          Algorithms, p290.
29 *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
30 */
31
[17314]32/* @(#) $Id: trees.c,v 1.1.1.2 2002-03-12 17:19:03 zacheiss Exp $ */
[15210]33
34/* #define GEN_TREES_H */
35
36#include "deflate.h"
37
38#ifdef DEBUG
39#  include <ctype.h>
40#endif
41
42/* ===========================================================================
43 * Constants
44 */
45
46#define MAX_BL_BITS 7
47/* Bit length codes must not exceed MAX_BL_BITS bits */
48
49#define END_BLOCK 256
50/* end of block literal code */
51
52#define REP_3_6      16
53/* repeat previous bit length 3-6 times (2 bits of repeat count) */
54
55#define REPZ_3_10    17
56/* repeat a zero length 3-10 times  (3 bits of repeat count) */
57
58#define REPZ_11_138  18
59/* repeat a zero length 11-138 times  (7 bits of repeat count) */
60
61local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
62   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
63
64local const int extra_dbits[D_CODES] /* extra bits for each distance code */
65   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
66
67local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
68   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
69
70local const uch bl_order[BL_CODES]
71   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
72/* The lengths of the bit length codes are sent in order of decreasing
73 * probability, to avoid transmitting the lengths for unused bit length codes.
74 */
75
76#define Buf_size (8 * 2*sizeof(char))
77/* Number of bits used within bi_buf. (bi_buf might be implemented on
78 * more than 16 bits on some systems.)
79 */
80
81/* ===========================================================================
82 * Local data. These are initialized only once.
83 */
84
85#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
86
87#if defined(GEN_TREES_H) || !defined(STDC)
88/* non ANSI compilers may not accept trees.h */
89
90local ct_data static_ltree[L_CODES+2];
91/* The static literal tree. Since the bit lengths are imposed, there is no
92 * need for the L_CODES extra codes used during heap construction. However
93 * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
94 * below).
95 */
96
97local ct_data static_dtree[D_CODES];
98/* The static distance tree. (Actually a trivial tree since all codes use
99 * 5 bits.)
100 */
101
102uch _dist_code[DIST_CODE_LEN];
103/* Distance codes. The first 256 values correspond to the distances
104 * 3 .. 258, the last 256 values correspond to the top 8 bits of
105 * the 15 bit distances.
106 */
107
108uch _length_code[MAX_MATCH-MIN_MATCH+1];
109/* length code for each normalized match length (0 == MIN_MATCH) */
110
111local int base_length[LENGTH_CODES];
112/* First normalized length for each code (0 = MIN_MATCH) */
113
114local int base_dist[D_CODES];
115/* First normalized distance for each code (0 = distance of 1) */
116
117#else
118#  include "trees.h"
119#endif /* GEN_TREES_H */
120
121struct static_tree_desc_s {
122    const ct_data *static_tree;  /* static tree or NULL */
123    const intf *extra_bits;      /* extra bits for each code or NULL */
124    int     extra_base;          /* base index for extra_bits */
125    int     elems;               /* max number of elements in the tree */
126    int     max_length;          /* max bit length for the codes */
127};
128
129local static_tree_desc  static_l_desc =
130{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
131
132local static_tree_desc  static_d_desc =
133{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
134
135local static_tree_desc  static_bl_desc =
136{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
137
138/* ===========================================================================
139 * Local (static) routines in this file.
140 */
141
142local void tr_static_init OF((void));
143local void init_block     OF((deflate_state *s));
144local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
145local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
146local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
147local void build_tree     OF((deflate_state *s, tree_desc *desc));
148local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
149local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
150local int  build_bl_tree  OF((deflate_state *s));
151local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
152                              int blcodes));
153local void compress_block OF((deflate_state *s, ct_data *ltree,
154                              ct_data *dtree));
155local void set_data_type  OF((deflate_state *s));
156local unsigned bi_reverse OF((unsigned value, int length));
157local void bi_windup      OF((deflate_state *s));
158local void bi_flush       OF((deflate_state *s));
159local void copy_block     OF((deflate_state *s, charf *buf, unsigned len,
160                              int header));
161
162#ifdef GEN_TREES_H
163local void gen_trees_header OF((void));
164#endif
165
166#ifndef DEBUG
167#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
168   /* Send a code of the given tree. c and tree must not have side effects */
169
170#else /* DEBUG */
171#  define send_code(s, c, tree) \
172     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
173       send_bits(s, tree[c].Code, tree[c].Len); }
174#endif
175
176/* ===========================================================================
177 * Output a short LSB first on the stream.
178 * IN assertion: there is enough room in pendingBuf.
179 */
180#define put_short(s, w) { \
181    put_byte(s, (uch)((w) & 0xff)); \
182    put_byte(s, (uch)((ush)(w) >> 8)); \
183}
184
185/* ===========================================================================
186 * Send a value on a given number of bits.
187 * IN assertion: length <= 16 and value fits in length bits.
188 */
189#ifdef DEBUG
190local void send_bits      OF((deflate_state *s, int value, int length));
191
192local void send_bits(s, value, length)
193    deflate_state *s;
194    int value;  /* value to send */
195    int length; /* number of bits */
196{
197    Tracevv((stderr," l %2d v %4x ", length, value));
198    Assert(length > 0 && length <= 15, "invalid length");
199    s->bits_sent += (ulg)length;
200
201    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
202     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
203     * unused bits in value.
204     */
205    if (s->bi_valid > (int)Buf_size - length) {
206        s->bi_buf |= (value << s->bi_valid);
207        put_short(s, s->bi_buf);
208        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
209        s->bi_valid += length - Buf_size;
210    } else {
211        s->bi_buf |= value << s->bi_valid;
212        s->bi_valid += length;
213    }
214}
215#else /* !DEBUG */
216
217#define send_bits(s, value, length) \
218{ int len = length;\
219  if (s->bi_valid > (int)Buf_size - len) {\
220    int val = value;\
221    s->bi_buf |= (val << s->bi_valid);\
222    put_short(s, s->bi_buf);\
223    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
224    s->bi_valid += len - Buf_size;\
225  } else {\
226    s->bi_buf |= (value) << s->bi_valid;\
227    s->bi_valid += len;\
228  }\
229}
230#endif /* DEBUG */
231
232
233#define MAX(a,b) (a >= b ? a : b)
234/* the arguments must not have side effects */
235
236/* ===========================================================================
237 * Initialize the various 'constant' tables.
238 */
239local void tr_static_init()
240{
241#if defined(GEN_TREES_H) || !defined(STDC)
242    static int static_init_done = 0;
243    int n;        /* iterates over tree elements */
244    int bits;     /* bit counter */
245    int length;   /* length value */
246    int code;     /* code value */
247    int dist;     /* distance index */
248    ush bl_count[MAX_BITS+1];
249    /* number of codes at each bit length for an optimal tree */
250
251    if (static_init_done) return;
252
253    /* For some embedded targets, global variables are not initialized: */
254    static_l_desc.static_tree = static_ltree;
255    static_l_desc.extra_bits = extra_lbits;
256    static_d_desc.static_tree = static_dtree;
257    static_d_desc.extra_bits = extra_dbits;
258    static_bl_desc.extra_bits = extra_blbits;
259
260    /* Initialize the mapping length (0..255) -> length code (0..28) */
261    length = 0;
262    for (code = 0; code < LENGTH_CODES-1; code++) {
263        base_length[code] = length;
264        for (n = 0; n < (1<<extra_lbits[code]); n++) {
265            _length_code[length++] = (uch)code;
266        }
267    }
268    Assert (length == 256, "tr_static_init: length != 256");
269    /* Note that the length 255 (match length 258) can be represented
270     * in two different ways: code 284 + 5 bits or code 285, so we
271     * overwrite length_code[255] to use the best encoding:
272     */
273    _length_code[length-1] = (uch)code;
274
275    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
276    dist = 0;
277    for (code = 0 ; code < 16; code++) {
278        base_dist[code] = dist;
279        for (n = 0; n < (1<<extra_dbits[code]); n++) {
280            _dist_code[dist++] = (uch)code;
281        }
282    }
283    Assert (dist == 256, "tr_static_init: dist != 256");
284    dist >>= 7; /* from now on, all distances are divided by 128 */
285    for ( ; code < D_CODES; code++) {
286        base_dist[code] = dist << 7;
287        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
288            _dist_code[256 + dist++] = (uch)code;
289        }
290    }
291    Assert (dist == 256, "tr_static_init: 256+dist != 512");
292
293    /* Construct the codes of the static literal tree */
294    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
295    n = 0;
296    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
297    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
298    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
299    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
300    /* Codes 286 and 287 do not exist, but we must include them in the
301     * tree construction to get a canonical Huffman tree (longest code
302     * all ones)
303     */
304    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
305
306    /* The static distance tree is trivial: */
307    for (n = 0; n < D_CODES; n++) {
308        static_dtree[n].Len = 5;
309        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
310    }
311    static_init_done = 1;
312
313#  ifdef GEN_TREES_H
314    gen_trees_header();
315#  endif
316#endif /* defined(GEN_TREES_H) || !defined(STDC) */
317}
318
319/* ===========================================================================
320 * Genererate the file trees.h describing the static trees.
321 */
322#ifdef GEN_TREES_H
323#  ifndef DEBUG
324#    include <stdio.h>
325#  endif
326
327#  define SEPARATOR(i, last, width) \
328      ((i) == (last)? "\n};\n\n" :    \
329       ((i) % (width) == (width)-1 ? ",\n" : ", "))
330
331void gen_trees_header()
332{
333    FILE *header = fopen("trees.h", "w");
334    int i;
335
336    Assert (header != NULL, "Can't open trees.h");
337    fprintf(header,
338            "/* header created automatically with -DGEN_TREES_H */\n\n");
339
340    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
341    for (i = 0; i < L_CODES+2; i++) {
342        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
343                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
344    }
345
346    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
347    for (i = 0; i < D_CODES; i++) {
348        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
349                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
350    }
351
352    fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
353    for (i = 0; i < DIST_CODE_LEN; i++) {
354        fprintf(header, "%2u%s", _dist_code[i],
355                SEPARATOR(i, DIST_CODE_LEN-1, 20));
356    }
357
358    fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
359    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
360        fprintf(header, "%2u%s", _length_code[i],
361                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
362    }
363
364    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
365    for (i = 0; i < LENGTH_CODES; i++) {
366        fprintf(header, "%1u%s", base_length[i],
367                SEPARATOR(i, LENGTH_CODES-1, 20));
368    }
369
370    fprintf(header, "local const int base_dist[D_CODES] = {\n");
371    for (i = 0; i < D_CODES; i++) {
372        fprintf(header, "%5u%s", base_dist[i],
373                SEPARATOR(i, D_CODES-1, 10));
374    }
375
376    fclose(header);
377}
378#endif /* GEN_TREES_H */
379
380/* ===========================================================================
381 * Initialize the tree data structures for a new zlib stream.
382 */
383void _tr_init(s)
384    deflate_state *s;
385{
386    tr_static_init();
387
388    s->l_desc.dyn_tree = s->dyn_ltree;
389    s->l_desc.stat_desc = &static_l_desc;
390
391    s->d_desc.dyn_tree = s->dyn_dtree;
392    s->d_desc.stat_desc = &static_d_desc;
393
394    s->bl_desc.dyn_tree = s->bl_tree;
395    s->bl_desc.stat_desc = &static_bl_desc;
396
397    s->bi_buf = 0;
398    s->bi_valid = 0;
399    s->last_eob_len = 8; /* enough lookahead for inflate */
400#ifdef DEBUG
401    s->compressed_len = 0L;
402    s->bits_sent = 0L;
403#endif
404
405    /* Initialize the first block of the first file: */
406    init_block(s);
407}
408
409/* ===========================================================================
410 * Initialize a new block.
411 */
412local void init_block(s)
413    deflate_state *s;
414{
415    int n; /* iterates over tree elements */
416
417    /* Initialize the trees. */
418    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
419    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
420    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
421
422    s->dyn_ltree[END_BLOCK].Freq = 1;
423    s->opt_len = s->static_len = 0L;
424    s->last_lit = s->matches = 0;
425}
426
427#define SMALLEST 1
428/* Index within the heap array of least frequent node in the Huffman tree */
429
430
431/* ===========================================================================
432 * Remove the smallest element from the heap and recreate the heap with
433 * one less element. Updates heap and heap_len.
434 */
435#define pqremove(s, tree, top) \
436{\
437    top = s->heap[SMALLEST]; \
438    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
439    pqdownheap(s, tree, SMALLEST); \
440}
441
442/* ===========================================================================
443 * Compares to subtrees, using the tree depth as tie breaker when
444 * the subtrees have equal frequency. This minimizes the worst case length.
445 */
446#define smaller(tree, n, m, depth) \
447   (tree[n].Freq < tree[m].Freq || \
448   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
449
450/* ===========================================================================
451 * Restore the heap property by moving down the tree starting at node k,
452 * exchanging a node with the smallest of its two sons if necessary, stopping
453 * when the heap property is re-established (each father smaller than its
454 * two sons).
455 */
456local void pqdownheap(s, tree, k)
457    deflate_state *s;
458    ct_data *tree;  /* the tree to restore */
459    int k;               /* node to move down */
460{
461    int v = s->heap[k];
462    int j = k << 1;  /* left son of k */
463    while (j <= s->heap_len) {
464        /* Set j to the smallest of the two sons: */
465        if (j < s->heap_len &&
466            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
467            j++;
468        }
469        /* Exit if v is smaller than both sons */
470        if (smaller(tree, v, s->heap[j], s->depth)) break;
471
472        /* Exchange v with the smallest son */
473        s->heap[k] = s->heap[j];  k = j;
474
475        /* And continue down the tree, setting j to the left son of k */
476        j <<= 1;
477    }
478    s->heap[k] = v;
479}
480
481/* ===========================================================================
482 * Compute the optimal bit lengths for a tree and update the total bit length
483 * for the current block.
484 * IN assertion: the fields freq and dad are set, heap[heap_max] and
485 *    above are the tree nodes sorted by increasing frequency.
486 * OUT assertions: the field len is set to the optimal bit length, the
487 *     array bl_count contains the frequencies for each bit length.
488 *     The length opt_len is updated; static_len is also updated if stree is
489 *     not null.
490 */
491local void gen_bitlen(s, desc)
492    deflate_state *s;
493    tree_desc *desc;    /* the tree descriptor */
494{
495    ct_data *tree        = desc->dyn_tree;
496    int max_code         = desc->max_code;
497    const ct_data *stree = desc->stat_desc->static_tree;
498    const intf *extra    = desc->stat_desc->extra_bits;
499    int base             = desc->stat_desc->extra_base;
500    int max_length       = desc->stat_desc->max_length;
501    int h;              /* heap index */
502    int n, m;           /* iterate over the tree elements */
503    int bits;           /* bit length */
504    int xbits;          /* extra bits */
505    ush f;              /* frequency */
506    int overflow = 0;   /* number of elements with bit length too large */
507
508    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
509
510    /* In a first pass, compute the optimal bit lengths (which may
511     * overflow in the case of the bit length tree).
512     */
513    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
514
515    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
516        n = s->heap[h];
517        bits = tree[tree[n].Dad].Len + 1;
518        if (bits > max_length) bits = max_length, overflow++;
519        tree[n].Len = (ush)bits;
520        /* We overwrite tree[n].Dad which is no longer needed */
521
522        if (n > max_code) continue; /* not a leaf node */
523
524        s->bl_count[bits]++;
525        xbits = 0;
526        if (n >= base) xbits = extra[n-base];
527        f = tree[n].Freq;
528        s->opt_len += (ulg)f * (bits + xbits);
529        if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
530    }
531    if (overflow == 0) return;
532
533    Trace((stderr,"\nbit length overflow\n"));
534    /* This happens for example on obj2 and pic of the Calgary corpus */
535
536    /* Find the first bit length which could increase: */
537    do {
538        bits = max_length-1;
539        while (s->bl_count[bits] == 0) bits--;
540        s->bl_count[bits]--;      /* move one leaf down the tree */
541        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
542        s->bl_count[max_length]--;
543        /* The brother of the overflow item also moves one step up,
544         * but this does not affect bl_count[max_length]
545         */
546        overflow -= 2;
547    } while (overflow > 0);
548
549    /* Now recompute all bit lengths, scanning in increasing frequency.
550     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
551     * lengths instead of fixing only the wrong ones. This idea is taken
552     * from 'ar' written by Haruhiko Okumura.)
553     */
554    for (bits = max_length; bits != 0; bits--) {
555        n = s->bl_count[bits];
556        while (n != 0) {
557            m = s->heap[--h];
558            if (m > max_code) continue;
559            if (tree[m].Len != (unsigned) bits) {
560                Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
561                s->opt_len += ((long)bits - (long)tree[m].Len)
562                              *(long)tree[m].Freq;
563                tree[m].Len = (ush)bits;
564            }
565            n--;
566        }
567    }
568}
569
570/* ===========================================================================
571 * Generate the codes for a given tree and bit counts (which need not be
572 * optimal).
573 * IN assertion: the array bl_count contains the bit length statistics for
574 * the given tree and the field len is set for all tree elements.
575 * OUT assertion: the field code is set for all tree elements of non
576 *     zero code length.
577 */
578local void gen_codes (tree, max_code, bl_count)
579    ct_data *tree;             /* the tree to decorate */
580    int max_code;              /* largest code with non zero frequency */
581    ushf *bl_count;            /* number of codes at each bit length */
582{
583    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
584    ush code = 0;              /* running code value */
585    int bits;                  /* bit index */
586    int n;                     /* code index */
587
588    /* The distribution counts are first used to generate the code values
589     * without bit reversal.
590     */
591    for (bits = 1; bits <= MAX_BITS; bits++) {
592        next_code[bits] = code = (code + bl_count[bits-1]) << 1;
593    }
594    /* Check that the bit counts in bl_count are consistent. The last code
595     * must be all ones.
596     */
597    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
598            "inconsistent bit counts");
599    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
600
601    for (n = 0;  n <= max_code; n++) {
602        int len = tree[n].Len;
603        if (len == 0) continue;
604        /* Now reverse the bits */
605        tree[n].Code = bi_reverse(next_code[len]++, len);
606
607        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
608             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
609    }
610}
611
612/* ===========================================================================
613 * Construct one Huffman tree and assigns the code bit strings and lengths.
614 * Update the total bit length for the current block.
615 * IN assertion: the field freq is set for all tree elements.
616 * OUT assertions: the fields len and code are set to the optimal bit length
617 *     and corresponding code. The length opt_len is updated; static_len is
618 *     also updated if stree is not null. The field max_code is set.
619 */
620local void build_tree(s, desc)
621    deflate_state *s;
622    tree_desc *desc; /* the tree descriptor */
623{
624    ct_data *tree         = desc->dyn_tree;
625    const ct_data *stree  = desc->stat_desc->static_tree;
626    int elems             = desc->stat_desc->elems;
627    int n, m;          /* iterate over heap elements */
628    int max_code = -1; /* largest code with non zero frequency */
629    int node;          /* new node being created */
630
631    /* Construct the initial heap, with least frequent element in
632     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
633     * heap[0] is not used.
634     */
635    s->heap_len = 0, s->heap_max = HEAP_SIZE;
636
637    for (n = 0; n < elems; n++) {
638        if (tree[n].Freq != 0) {
639            s->heap[++(s->heap_len)] = max_code = n;
640            s->depth[n] = 0;
641        } else {
642            tree[n].Len = 0;
643        }
644    }
645
646    /* The pkzip format requires that at least one distance code exists,
647     * and that at least one bit should be sent even if there is only one
648     * possible code. So to avoid special checks later on we force at least
649     * two codes of non zero frequency.
650     */
651    while (s->heap_len < 2) {
652        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
653        tree[node].Freq = 1;
654        s->depth[node] = 0;
655        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
656        /* node is 0 or 1 so it does not have extra bits */
657    }
658    desc->max_code = max_code;
659
660    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
661     * establish sub-heaps of increasing lengths:
662     */
663    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
664
665    /* Construct the Huffman tree by repeatedly combining the least two
666     * frequent nodes.
667     */
668    node = elems;              /* next internal node of the tree */
669    do {
670        pqremove(s, tree, n);  /* n = node of least frequency */
671        m = s->heap[SMALLEST]; /* m = node of next least frequency */
672
673        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
674        s->heap[--(s->heap_max)] = m;
675
676        /* Create a new node father of n and m */
677        tree[node].Freq = tree[n].Freq + tree[m].Freq;
678        s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1);
679        tree[n].Dad = tree[m].Dad = (ush)node;
680#ifdef DUMP_BL_TREE
681        if (tree == s->bl_tree) {
682            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
683                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
684        }
685#endif
686        /* and insert the new node in the heap */
687        s->heap[SMALLEST] = node++;
688        pqdownheap(s, tree, SMALLEST);
689
690    } while (s->heap_len >= 2);
691
692    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
693
694    /* At this point, the fields freq and dad are set. We can now
695     * generate the bit lengths.
696     */
697    gen_bitlen(s, (tree_desc *)desc);
698
699    /* The field len is now set, we can generate the bit codes */
700    gen_codes ((ct_data *)tree, max_code, s->bl_count);
701}
702
703/* ===========================================================================
704 * Scan a literal or distance tree to determine the frequencies of the codes
705 * in the bit length tree.
706 */
707local void scan_tree (s, tree, max_code)
708    deflate_state *s;
709    ct_data *tree;   /* the tree to be scanned */
710    int max_code;    /* and its largest code of non zero frequency */
711{
712    int n;                     /* iterates over all tree elements */
713    int prevlen = -1;          /* last emitted length */
714    int curlen;                /* length of current code */
715    int nextlen = tree[0].Len; /* length of next code */
716    int count = 0;             /* repeat count of the current code */
717    int max_count = 7;         /* max repeat count */
718    int min_count = 4;         /* min repeat count */
719
720    if (nextlen == 0) max_count = 138, min_count = 3;
721    tree[max_code+1].Len = (ush)0xffff; /* guard */
722
723    for (n = 0; n <= max_code; n++) {
724        curlen = nextlen; nextlen = tree[n+1].Len;
725        if (++count < max_count && curlen == nextlen) {
726            continue;
727        } else if (count < min_count) {
728            s->bl_tree[curlen].Freq += count;
729        } else if (curlen != 0) {
730            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
731            s->bl_tree[REP_3_6].Freq++;
732        } else if (count <= 10) {
733            s->bl_tree[REPZ_3_10].Freq++;
734        } else {
735            s->bl_tree[REPZ_11_138].Freq++;
736        }
737        count = 0; prevlen = curlen;
738        if (nextlen == 0) {
739            max_count = 138, min_count = 3;
740        } else if (curlen == nextlen) {
741            max_count = 6, min_count = 3;
742        } else {
743            max_count = 7, min_count = 4;
744        }
745    }
746}
747
748/* ===========================================================================
749 * Send a literal or distance tree in compressed form, using the codes in
750 * bl_tree.
751 */
752local void send_tree (s, tree, max_code)
753    deflate_state *s;
754    ct_data *tree; /* the tree to be scanned */
755    int max_code;       /* and its largest code of non zero frequency */
756{
757    int n;                     /* iterates over all tree elements */
758    int prevlen = -1;          /* last emitted length */
759    int curlen;                /* length of current code */
760    int nextlen = tree[0].Len; /* length of next code */
761    int count = 0;             /* repeat count of the current code */
762    int max_count = 7;         /* max repeat count */
763    int min_count = 4;         /* min repeat count */
764
765    /* tree[max_code+1].Len = -1; */  /* guard already set */
766    if (nextlen == 0) max_count = 138, min_count = 3;
767
768    for (n = 0; n <= max_code; n++) {
769        curlen = nextlen; nextlen = tree[n+1].Len;
770        if (++count < max_count && curlen == nextlen) {
771            continue;
772        } else if (count < min_count) {
773            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
774
775        } else if (curlen != 0) {
776            if (curlen != prevlen) {
777                send_code(s, curlen, s->bl_tree); count--;
778            }
779            Assert(count >= 3 && count <= 6, " 3_6?");
780            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
781
782        } else if (count <= 10) {
783            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
784
785        } else {
786            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
787        }
788        count = 0; prevlen = curlen;
789        if (nextlen == 0) {
790            max_count = 138, min_count = 3;
791        } else if (curlen == nextlen) {
792            max_count = 6, min_count = 3;
793        } else {
794            max_count = 7, min_count = 4;
795        }
796    }
797}
798
799/* ===========================================================================
800 * Construct the Huffman tree for the bit lengths and return the index in
801 * bl_order of the last bit length code to send.
802 */
803local int build_bl_tree(s)
804    deflate_state *s;
805{
806    int max_blindex;  /* index of last bit length code of non zero freq */
807
808    /* Determine the bit length frequencies for literal and distance trees */
809    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
810    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
811
812    /* Build the bit length tree: */
813    build_tree(s, (tree_desc *)(&(s->bl_desc)));
814    /* opt_len now includes the length of the tree representations, except
815     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
816     */
817
818    /* Determine the number of bit length codes to send. The pkzip format
819     * requires that at least 4 bit length codes be sent. (appnote.txt says
820     * 3 but the actual value used is 4.)
821     */
822    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
823        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
824    }
825    /* Update opt_len to include the bit length tree and counts */
826    s->opt_len += 3*(max_blindex+1) + 5+5+4;
827    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
828            s->opt_len, s->static_len));
829
830    return max_blindex;
831}
832
833/* ===========================================================================
834 * Send the header for a block using dynamic Huffman trees: the counts, the
835 * lengths of the bit length codes, the literal tree and the distance tree.
836 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
837 */
838local void send_all_trees(s, lcodes, dcodes, blcodes)
839    deflate_state *s;
840    int lcodes, dcodes, blcodes; /* number of codes for each tree */
841{
842    int rank;                    /* index in bl_order */
843
844    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
845    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
846            "too many codes");
847    Tracev((stderr, "\nbl counts: "));
848    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
849    send_bits(s, dcodes-1,   5);
850    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
851    for (rank = 0; rank < blcodes; rank++) {
852        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
853        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
854    }
855    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
856
857    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
858    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
859
860    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
861    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
862}
863
864/* ===========================================================================
865 * Send a stored block
866 */
867void _tr_stored_block(s, buf, stored_len, eof)
868    deflate_state *s;
869    charf *buf;       /* input block */
870    ulg stored_len;   /* length of input block */
871    int eof;          /* true if this is the last block for a file */
872{
873    send_bits(s, (STORED_BLOCK<<1)+eof, 3);  /* send block type */
874#ifdef DEBUG
875    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
876    s->compressed_len += (stored_len + 4) << 3;
877#endif
878    copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
879}
880
881/* ===========================================================================
882 * Send one empty static block to give enough lookahead for inflate.
883 * This takes 10 bits, of which 7 may remain in the bit buffer.
884 * The current inflate code requires 9 bits of lookahead. If the
885 * last two codes for the previous block (real code plus EOB) were coded
886 * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
887 * the last real code. In this case we send two empty static blocks instead
888 * of one. (There are no problems if the previous block is stored or fixed.)
889 * To simplify the code, we assume the worst case of last real code encoded
890 * on one bit only.
891 */
892void _tr_align(s)
893    deflate_state *s;
894{
895    send_bits(s, STATIC_TREES<<1, 3);
896    send_code(s, END_BLOCK, static_ltree);
897#ifdef DEBUG
898    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
899#endif
900    bi_flush(s);
901    /* Of the 10 bits for the empty block, we have already sent
902     * (10 - bi_valid) bits. The lookahead for the last real code (before
903     * the EOB of the previous block) was thus at least one plus the length
904     * of the EOB plus what we have just sent of the empty static block.
905     */
906    if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
907        send_bits(s, STATIC_TREES<<1, 3);
908        send_code(s, END_BLOCK, static_ltree);
909#ifdef DEBUG
910        s->compressed_len += 10L;
911#endif
912        bi_flush(s);
913    }
914    s->last_eob_len = 7;
915}
916
917/* ===========================================================================
918 * Determine the best encoding for the current block: dynamic trees, static
919 * trees or store, and output the encoded block to the zip file.
920 */
921void _tr_flush_block(s, buf, stored_len, eof)
922    deflate_state *s;
923    charf *buf;       /* input block, or NULL if too old */
924    ulg stored_len;   /* length of input block */
925    int eof;          /* true if this is the last block for a file */
926{
927    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
928    int max_blindex = 0;  /* index of last bit length code of non zero freq */
929
930    /* Build the Huffman trees unless a stored block is forced */
931    if (s->level > 0) {
932
933         /* Check if the file is ascii or binary */
934        if (s->data_type == Z_UNKNOWN) set_data_type(s);
935
936        /* Construct the literal and distance trees */
937        build_tree(s, (tree_desc *)(&(s->l_desc)));
938        Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
939                s->static_len));
940
941        build_tree(s, (tree_desc *)(&(s->d_desc)));
942        Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
943                s->static_len));
944        /* At this point, opt_len and static_len are the total bit lengths of
945         * the compressed block data, excluding the tree representations.
946         */
947
948        /* Build the bit length tree for the above two trees, and get the index
949         * in bl_order of the last bit length code to send.
950         */
951        max_blindex = build_bl_tree(s);
952
953        /* Determine the best encoding. Compute first the block length in bytes*/
954        opt_lenb = (s->opt_len+3+7)>>3;
955        static_lenb = (s->static_len+3+7)>>3;
956
957        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
958                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
959                s->last_lit));
960
961        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
962
963    } else {
964        Assert(buf != (char*)0, "lost buf");
965        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
966    }
967
968#ifdef FORCE_STORED
969    if (buf != (char*)0) { /* force stored block */
970#else
971    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
972                       /* 4: two words for the lengths */
973#endif
974        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
975         * Otherwise we can't have processed more than WSIZE input bytes since
976         * the last block flush, because compression would have been
977         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
978         * transform a block into a stored block.
979         */
980        _tr_stored_block(s, buf, stored_len, eof);
981
982#ifdef FORCE_STATIC
983    } else if (static_lenb >= 0) { /* force static trees */
984#else
985    } else if (static_lenb == opt_lenb) {
986#endif
987        send_bits(s, (STATIC_TREES<<1)+eof, 3);
988        compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
989#ifdef DEBUG
990        s->compressed_len += 3 + s->static_len;
991#endif
992    } else {
993        send_bits(s, (DYN_TREES<<1)+eof, 3);
994        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
995                       max_blindex+1);
996        compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
997#ifdef DEBUG
998        s->compressed_len += 3 + s->opt_len;
999#endif
1000    }
1001    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
1002    /* The above check is made mod 2^32, for files larger than 512 MB
1003     * and uLong implemented on 32 bits.
1004     */
1005    init_block(s);
1006
1007    if (eof) {
1008        bi_windup(s);
1009#ifdef DEBUG
1010        s->compressed_len += 7;  /* align on byte boundary */
1011#endif
1012    }
1013    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1014           s->compressed_len-7*eof));
1015}
1016
1017/* ===========================================================================
1018 * Save the match info and tally the frequency counts. Return true if
1019 * the current block must be flushed.
1020 */
1021int _tr_tally (s, dist, lc)
1022    deflate_state *s;
1023    unsigned dist;  /* distance of matched string */
1024    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
1025{
1026    s->d_buf[s->last_lit] = (ush)dist;
1027    s->l_buf[s->last_lit++] = (uch)lc;
1028    if (dist == 0) {
1029        /* lc is the unmatched char */
1030        s->dyn_ltree[lc].Freq++;
1031    } else {
1032        s->matches++;
1033        /* Here, lc is the match length - MIN_MATCH */
1034        dist--;             /* dist = match distance - 1 */
1035        Assert((ush)dist < (ush)MAX_DIST(s) &&
1036               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1037               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
1038
1039        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
1040        s->dyn_dtree[d_code(dist)].Freq++;
1041    }
1042
1043#ifdef TRUNCATE_BLOCK
1044    /* Try to guess if it is profitable to stop the current block here */
1045    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1046        /* Compute an upper bound for the compressed length */
1047        ulg out_length = (ulg)s->last_lit*8L;
1048        ulg in_length = (ulg)((long)s->strstart - s->block_start);
1049        int dcode;
1050        for (dcode = 0; dcode < D_CODES; dcode++) {
1051            out_length += (ulg)s->dyn_dtree[dcode].Freq *
1052                (5L+extra_dbits[dcode]);
1053        }
1054        out_length >>= 3;
1055        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1056               s->last_lit, in_length, out_length,
1057               100L - out_length*100L/in_length));
1058        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1059    }
1060#endif
1061    return (s->last_lit == s->lit_bufsize-1);
1062    /* We avoid equality with lit_bufsize because of wraparound at 64K
1063     * on 16 bit machines and because stored blocks are restricted to
1064     * 64K-1 bytes.
1065     */
1066}
1067
1068/* ===========================================================================
1069 * Send the block data compressed using the given Huffman trees
1070 */
1071local void compress_block(s, ltree, dtree)
1072    deflate_state *s;
1073    ct_data *ltree; /* literal tree */
1074    ct_data *dtree; /* distance tree */
1075{
1076    unsigned dist;      /* distance of matched string */
1077    int lc;             /* match length or unmatched char (if dist == 0) */
1078    unsigned lx = 0;    /* running index in l_buf */
1079    unsigned code;      /* the code to send */
1080    int extra;          /* number of extra bits to send */
1081
1082    if (s->last_lit != 0) do {
1083        dist = s->d_buf[lx];
1084        lc = s->l_buf[lx++];
1085        if (dist == 0) {
1086            send_code(s, lc, ltree); /* send a literal byte */
1087            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1088        } else {
1089            /* Here, lc is the match length - MIN_MATCH */
1090            code = _length_code[lc];
1091            send_code(s, code+LITERALS+1, ltree); /* send the length code */
1092            extra = extra_lbits[code];
1093            if (extra != 0) {
1094                lc -= base_length[code];
1095                send_bits(s, lc, extra);       /* send the extra length bits */
1096            }
1097            dist--; /* dist is now the match distance - 1 */
1098            code = d_code(dist);
1099            Assert (code < D_CODES, "bad d_code");
1100
1101            send_code(s, code, dtree);       /* send the distance code */
1102            extra = extra_dbits[code];
1103            if (extra != 0) {
1104                dist -= base_dist[code];
1105                send_bits(s, dist, extra);   /* send the extra distance bits */
1106            }
1107        } /* literal or match pair ? */
1108
1109        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1110        Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
1111
1112    } while (lx < s->last_lit);
1113
1114    send_code(s, END_BLOCK, ltree);
1115    s->last_eob_len = ltree[END_BLOCK].Len;
1116}
1117
1118/* ===========================================================================
1119 * Set the data type to ASCII or BINARY, using a crude approximation:
1120 * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
1121 * IN assertion: the fields freq of dyn_ltree are set and the total of all
1122 * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
1123 */
1124local void set_data_type(s)
1125    deflate_state *s;
1126{
1127    int n = 0;
1128    unsigned ascii_freq = 0;
1129    unsigned bin_freq = 0;
1130    while (n < 7)        bin_freq += s->dyn_ltree[n++].Freq;
1131    while (n < 128)    ascii_freq += s->dyn_ltree[n++].Freq;
1132    while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
1133    s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
1134}
1135
1136/* ===========================================================================
1137 * Reverse the first len bits of a code, using straightforward code (a faster
1138 * method would use a table)
1139 * IN assertion: 1 <= len <= 15
1140 */
1141local unsigned bi_reverse(code, len)
1142    unsigned code; /* the value to invert */
1143    int len;       /* its bit length */
1144{
1145    register unsigned res = 0;
1146    do {
1147        res |= code & 1;
1148        code >>= 1, res <<= 1;
1149    } while (--len > 0);
1150    return res >> 1;
1151}
1152
1153/* ===========================================================================
1154 * Flush the bit buffer, keeping at most 7 bits in it.
1155 */
1156local void bi_flush(s)
1157    deflate_state *s;
1158{
1159    if (s->bi_valid == 16) {
1160        put_short(s, s->bi_buf);
1161        s->bi_buf = 0;
1162        s->bi_valid = 0;
1163    } else if (s->bi_valid >= 8) {
1164        put_byte(s, (Byte)s->bi_buf);
1165        s->bi_buf >>= 8;
1166        s->bi_valid -= 8;
1167    }
1168}
1169
1170/* ===========================================================================
1171 * Flush the bit buffer and align the output on a byte boundary
1172 */
1173local void bi_windup(s)
1174    deflate_state *s;
1175{
1176    if (s->bi_valid > 8) {
1177        put_short(s, s->bi_buf);
1178    } else if (s->bi_valid > 0) {
1179        put_byte(s, (Byte)s->bi_buf);
1180    }
1181    s->bi_buf = 0;
1182    s->bi_valid = 0;
1183#ifdef DEBUG
1184    s->bits_sent = (s->bits_sent+7) & ~7;
1185#endif
1186}
1187
1188/* ===========================================================================
1189 * Copy a stored block, storing first the length and its
1190 * one's complement if requested.
1191 */
1192local void copy_block(s, buf, len, header)
1193    deflate_state *s;
1194    charf    *buf;    /* the input data */
1195    unsigned len;     /* its length */
1196    int      header;  /* true if block header must be written */
1197{
1198    bi_windup(s);        /* align on byte boundary */
1199    s->last_eob_len = 8; /* enough lookahead for inflate */
1200
1201    if (header) {
1202        put_short(s, (ush)len);   
1203        put_short(s, (ush)~len);
1204#ifdef DEBUG
1205        s->bits_sent += 2*16;
1206#endif
1207    }
1208#ifdef DEBUG
1209    s->bits_sent += (ulg)len<<3;
1210#endif
1211    while (len--) {
1212        put_byte(s, *buf++);
1213    }
1214}
Note: See TracBrowser for help on using the repository browser.