source: trunk/third/firefox/jpeg/jfdctfst.c @ 21695

Revision 21695, 7.4 KB checked in by rbasch, 20 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r21694, which included commits to RCS files with non-trunk default branches.
Line 
1/*
2 * jfdctfst.c
3 *
4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains a fast, not so accurate integer implementation of the
9 * forward DCT (Discrete Cosine Transform).
10 *
11 * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
12 * on each column.  Direct algorithms are also available, but they are
13 * much more complex and seem not to be any faster when reduced to code.
14 *
15 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
16 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
17 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
18 * JPEG textbook (see REFERENCES section in file README).  The following code
19 * is based directly on figure 4-8 in P&M.
20 * While an 8-point DCT cannot be done in less than 11 multiplies, it is
21 * possible to arrange the computation so that many of the multiplies are
22 * simple scalings of the final outputs.  These multiplies can then be
23 * folded into the multiplications or divisions by the JPEG quantization
24 * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
25 * to be done in the DCT itself.
26 * The primary disadvantage of this method is that with fixed-point math,
27 * accuracy is lost due to imprecise representation of the scaled
28 * quantization values.  The smaller the quantization table entry, the less
29 * precise the scaled value, so this implementation does worse with high-
30 * quality-setting files than with low-quality ones.
31 */
32
33#define JPEG_INTERNALS
34#include "jinclude.h"
35#include "jpeglib.h"
36#include "jdct.h"               /* Private declarations for DCT subsystem */
37
38#ifdef DCT_IFAST_SUPPORTED
39
40
41/*
42 * This module is specialized to the case DCTSIZE = 8.
43 */
44
45#if DCTSIZE != 8
46  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
47#endif
48
49
50/* Scaling decisions are generally the same as in the LL&M algorithm;
51 * see jfdctint.c for more details.  However, we choose to descale
52 * (right shift) multiplication products as soon as they are formed,
53 * rather than carrying additional fractional bits into subsequent additions.
54 * This compromises accuracy slightly, but it lets us save a few shifts.
55 * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
56 * everywhere except in the multiplications proper; this saves a good deal
57 * of work on 16-bit-int machines.
58 *
59 * Again to save a few shifts, the intermediate results between pass 1 and
60 * pass 2 are not upscaled, but are represented only to integral precision.
61 *
62 * A final compromise is to represent the multiplicative constants to only
63 * 8 fractional bits, rather than 13.  This saves some shifting work on some
64 * machines, and may also reduce the cost of multiplication (since there
65 * are fewer one-bits in the constants).
66 */
67
68#define CONST_BITS  8
69
70
71/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
72 * causing a lot of useless floating-point operations at run time.
73 * To get around this we use the following pre-calculated constants.
74 * If you change CONST_BITS you may want to add appropriate values.
75 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
76 */
77
78#if CONST_BITS == 8
79#define FIX_0_382683433  ((INT32)   98)         /* FIX(0.382683433) */
80#define FIX_0_541196100  ((INT32)  139)         /* FIX(0.541196100) */
81#define FIX_0_707106781  ((INT32)  181)         /* FIX(0.707106781) */
82#define FIX_1_306562965  ((INT32)  334)         /* FIX(1.306562965) */
83#else
84#define FIX_0_382683433  FIX(0.382683433)
85#define FIX_0_541196100  FIX(0.541196100)
86#define FIX_0_707106781  FIX(0.707106781)
87#define FIX_1_306562965  FIX(1.306562965)
88#endif
89
90
91/* We can gain a little more speed, with a further compromise in accuracy,
92 * by omitting the addition in a descaling shift.  This yields an incorrectly
93 * rounded result half the time...
94 */
95
96#ifndef USE_ACCURATE_ROUNDING
97#undef DESCALE
98#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
99#endif
100
101
102/* Multiply a DCTELEM variable by an INT32 constant, and immediately
103 * descale to yield a DCTELEM result.
104 */
105
106#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
107
108
109/*
110 * Perform the forward DCT on one block of samples.
111 */
112
113GLOBAL(void)
114jpeg_fdct_ifast (DCTELEM * data)
115{
116  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
117  DCTELEM tmp10, tmp11, tmp12, tmp13;
118  DCTELEM z1, z2, z3, z4, z5, z11, z13;
119  DCTELEM *dataptr;
120  int ctr;
121  SHIFT_TEMPS
122
123  /* Pass 1: process rows. */
124
125  dataptr = data;
126  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
127    tmp0 = dataptr[0] + dataptr[7];
128    tmp7 = dataptr[0] - dataptr[7];
129    tmp1 = dataptr[1] + dataptr[6];
130    tmp6 = dataptr[1] - dataptr[6];
131    tmp2 = dataptr[2] + dataptr[5];
132    tmp5 = dataptr[2] - dataptr[5];
133    tmp3 = dataptr[3] + dataptr[4];
134    tmp4 = dataptr[3] - dataptr[4];
135   
136    /* Even part */
137   
138    tmp10 = tmp0 + tmp3;        /* phase 2 */
139    tmp13 = tmp0 - tmp3;
140    tmp11 = tmp1 + tmp2;
141    tmp12 = tmp1 - tmp2;
142   
143    dataptr[0] = tmp10 + tmp11; /* phase 3 */
144    dataptr[4] = tmp10 - tmp11;
145   
146    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
147    dataptr[2] = tmp13 + z1;    /* phase 5 */
148    dataptr[6] = tmp13 - z1;
149   
150    /* Odd part */
151
152    tmp10 = tmp4 + tmp5;        /* phase 2 */
153    tmp11 = tmp5 + tmp6;
154    tmp12 = tmp6 + tmp7;
155
156    /* The rotator is modified from fig 4-8 to avoid extra negations. */
157    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
158    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
159    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
160    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
161
162    z11 = tmp7 + z3;            /* phase 5 */
163    z13 = tmp7 - z3;
164
165    dataptr[5] = z13 + z2;      /* phase 6 */
166    dataptr[3] = z13 - z2;
167    dataptr[1] = z11 + z4;
168    dataptr[7] = z11 - z4;
169
170    dataptr += DCTSIZE;         /* advance pointer to next row */
171  }
172
173  /* Pass 2: process columns. */
174
175  dataptr = data;
176  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
177    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
178    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
179    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
180    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
181    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
182    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
183    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
184    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
185   
186    /* Even part */
187   
188    tmp10 = tmp0 + tmp3;        /* phase 2 */
189    tmp13 = tmp0 - tmp3;
190    tmp11 = tmp1 + tmp2;
191    tmp12 = tmp1 - tmp2;
192   
193    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
194    dataptr[DCTSIZE*4] = tmp10 - tmp11;
195   
196    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
197    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
198    dataptr[DCTSIZE*6] = tmp13 - z1;
199   
200    /* Odd part */
201
202    tmp10 = tmp4 + tmp5;        /* phase 2 */
203    tmp11 = tmp5 + tmp6;
204    tmp12 = tmp6 + tmp7;
205
206    /* The rotator is modified from fig 4-8 to avoid extra negations. */
207    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
208    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
209    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
210    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
211
212    z11 = tmp7 + z3;            /* phase 5 */
213    z13 = tmp7 - z3;
214
215    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
216    dataptr[DCTSIZE*3] = z13 - z2;
217    dataptr[DCTSIZE*1] = z11 + z4;
218    dataptr[DCTSIZE*7] = z11 - z4;
219
220    dataptr++;                  /* advance pointer to next column */
221  }
222}
223
224#endif /* DCT_IFAST_SUPPORTED */
Note: See TracBrowser for help on using the repository browser.