source: trunk/third/jpeg/jidctflt.c @ 15227

Revision 15227, 8.3 KB checked in by ghudson, 24 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r15226, which included commits to RCS files with non-trunk default branches.
Line 
1/*
2 * jidctflt.c
3 *
4 * Copyright (C) 1994-1998, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains a floating-point implementation of the
9 * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
10 * must also perform dequantization of the input coefficients.
11 *
12 * This implementation should be more accurate than either of the integer
13 * IDCT implementations.  However, it may not give the same results on all
14 * machines because of differences in roundoff behavior.  Speed will depend
15 * on the hardware's floating point capacity.
16 *
17 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
18 * on each row (or vice versa, but it's more convenient to emit a row at
19 * a time).  Direct algorithms are also available, but they are much more
20 * complex and seem not to be any faster when reduced to code.
21 *
22 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
23 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
24 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
25 * JPEG textbook (see REFERENCES section in file README).  The following code
26 * is based directly on figure 4-8 in P&M.
27 * While an 8-point DCT cannot be done in less than 11 multiplies, it is
28 * possible to arrange the computation so that many of the multiplies are
29 * simple scalings of the final outputs.  These multiplies can then be
30 * folded into the multiplications or divisions by the JPEG quantization
31 * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
32 * to be done in the DCT itself.
33 * The primary disadvantage of this method is that with a fixed-point
34 * implementation, accuracy is lost due to imprecise representation of the
35 * scaled quantization values.  However, that problem does not arise if
36 * we use floating point arithmetic.
37 */
38
39#define JPEG_INTERNALS
40#include "jinclude.h"
41#include "jpeglib.h"
42#include "jdct.h"               /* Private declarations for DCT subsystem */
43
44#ifdef DCT_FLOAT_SUPPORTED
45
46
47/*
48 * This module is specialized to the case DCTSIZE = 8.
49 */
50
51#if DCTSIZE != 8
52  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
53#endif
54
55
56/* Dequantize a coefficient by multiplying it by the multiplier-table
57 * entry; produce a float result.
58 */
59
60#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
61
62
63/*
64 * Perform dequantization and inverse DCT on one block of coefficients.
65 */
66
67GLOBAL(void)
68jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
69                 JCOEFPTR coef_block,
70                 JSAMPARRAY output_buf, JDIMENSION output_col)
71{
72  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
73  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
74  FAST_FLOAT z5, z10, z11, z12, z13;
75  JCOEFPTR inptr;
76  FLOAT_MULT_TYPE * quantptr;
77  FAST_FLOAT * wsptr;
78  JSAMPROW outptr;
79  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
80  int ctr;
81  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
82  SHIFT_TEMPS
83
84  /* Pass 1: process columns from input, store into work array. */
85
86  inptr = coef_block;
87  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
88  wsptr = workspace;
89  for (ctr = DCTSIZE; ctr > 0; ctr--) {
90    /* Due to quantization, we will usually find that many of the input
91     * coefficients are zero, especially the AC terms.  We can exploit this
92     * by short-circuiting the IDCT calculation for any column in which all
93     * the AC terms are zero.  In that case each output is equal to the
94     * DC coefficient (with scale factor as needed).
95     * With typical images and quantization tables, half or more of the
96     * column DCT calculations can be simplified this way.
97     */
98   
99    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
100        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
101        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
102        inptr[DCTSIZE*7] == 0) {
103      /* AC terms all zero */
104      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
105     
106      wsptr[DCTSIZE*0] = dcval;
107      wsptr[DCTSIZE*1] = dcval;
108      wsptr[DCTSIZE*2] = dcval;
109      wsptr[DCTSIZE*3] = dcval;
110      wsptr[DCTSIZE*4] = dcval;
111      wsptr[DCTSIZE*5] = dcval;
112      wsptr[DCTSIZE*6] = dcval;
113      wsptr[DCTSIZE*7] = dcval;
114     
115      inptr++;                  /* advance pointers to next column */
116      quantptr++;
117      wsptr++;
118      continue;
119    }
120   
121    /* Even part */
122
123    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
124    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
125    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
126    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
127
128    tmp10 = tmp0 + tmp2;        /* phase 3 */
129    tmp11 = tmp0 - tmp2;
130
131    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
132    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
133
134    tmp0 = tmp10 + tmp13;       /* phase 2 */
135    tmp3 = tmp10 - tmp13;
136    tmp1 = tmp11 + tmp12;
137    tmp2 = tmp11 - tmp12;
138   
139    /* Odd part */
140
141    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
142    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
143    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
144    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
145
146    z13 = tmp6 + tmp5;          /* phase 6 */
147    z10 = tmp6 - tmp5;
148    z11 = tmp4 + tmp7;
149    z12 = tmp4 - tmp7;
150
151    tmp7 = z11 + z13;           /* phase 5 */
152    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
153
154    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
155    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
156    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
157
158    tmp6 = tmp12 - tmp7;        /* phase 2 */
159    tmp5 = tmp11 - tmp6;
160    tmp4 = tmp10 + tmp5;
161
162    wsptr[DCTSIZE*0] = tmp0 + tmp7;
163    wsptr[DCTSIZE*7] = tmp0 - tmp7;
164    wsptr[DCTSIZE*1] = tmp1 + tmp6;
165    wsptr[DCTSIZE*6] = tmp1 - tmp6;
166    wsptr[DCTSIZE*2] = tmp2 + tmp5;
167    wsptr[DCTSIZE*5] = tmp2 - tmp5;
168    wsptr[DCTSIZE*4] = tmp3 + tmp4;
169    wsptr[DCTSIZE*3] = tmp3 - tmp4;
170
171    inptr++;                    /* advance pointers to next column */
172    quantptr++;
173    wsptr++;
174  }
175 
176  /* Pass 2: process rows from work array, store into output array. */
177  /* Note that we must descale the results by a factor of 8 == 2**3. */
178
179  wsptr = workspace;
180  for (ctr = 0; ctr < DCTSIZE; ctr++) {
181    outptr = output_buf[ctr] + output_col;
182    /* Rows of zeroes can be exploited in the same way as we did with columns.
183     * However, the column calculation has created many nonzero AC terms, so
184     * the simplification applies less often (typically 5% to 10% of the time).
185     * And testing floats for zero is relatively expensive, so we don't bother.
186     */
187   
188    /* Even part */
189
190    tmp10 = wsptr[0] + wsptr[4];
191    tmp11 = wsptr[0] - wsptr[4];
192
193    tmp13 = wsptr[2] + wsptr[6];
194    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
195
196    tmp0 = tmp10 + tmp13;
197    tmp3 = tmp10 - tmp13;
198    tmp1 = tmp11 + tmp12;
199    tmp2 = tmp11 - tmp12;
200
201    /* Odd part */
202
203    z13 = wsptr[5] + wsptr[3];
204    z10 = wsptr[5] - wsptr[3];
205    z11 = wsptr[1] + wsptr[7];
206    z12 = wsptr[1] - wsptr[7];
207
208    tmp7 = z11 + z13;
209    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
210
211    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
212    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
213    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
214
215    tmp6 = tmp12 - tmp7;
216    tmp5 = tmp11 - tmp6;
217    tmp4 = tmp10 + tmp5;
218
219    /* Final output stage: scale down by a factor of 8 and range-limit */
220
221    outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
222                            & RANGE_MASK];
223    outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
224                            & RANGE_MASK];
225    outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
226                            & RANGE_MASK];
227    outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
228                            & RANGE_MASK];
229    outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
230                            & RANGE_MASK];
231    outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
232                            & RANGE_MASK];
233    outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
234                            & RANGE_MASK];
235    outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
236                            & RANGE_MASK];
237   
238    wsptr += DCTSIZE;           /* advance pointer to next row */
239  }
240}
241
242#endif /* DCT_FLOAT_SUPPORTED */
Note: See TracBrowser for help on using the repository browser.