1 | /* |
---|
2 | * jdmerge.c |
---|
3 | * |
---|
4 | * Copyright (C) 1994-1996, Thomas G. Lane. |
---|
5 | * This file is part of the Independent JPEG Group's software. |
---|
6 | * For conditions of distribution and use, see the accompanying README file. |
---|
7 | * |
---|
8 | * This file contains code for merged upsampling/color conversion. |
---|
9 | * |
---|
10 | * This file combines functions from jdsample.c and jdcolor.c; |
---|
11 | * read those files first to understand what's going on. |
---|
12 | * |
---|
13 | * When the chroma components are to be upsampled by simple replication |
---|
14 | * (ie, box filtering), we can save some work in color conversion by |
---|
15 | * calculating all the output pixels corresponding to a pair of chroma |
---|
16 | * samples at one time. In the conversion equations |
---|
17 | * R = Y + K1 * Cr |
---|
18 | * G = Y + K2 * Cb + K3 * Cr |
---|
19 | * B = Y + K4 * Cb |
---|
20 | * only the Y term varies among the group of pixels corresponding to a pair |
---|
21 | * of chroma samples, so the rest of the terms can be calculated just once. |
---|
22 | * At typical sampling ratios, this eliminates half or three-quarters of the |
---|
23 | * multiplications needed for color conversion. |
---|
24 | * |
---|
25 | * This file currently provides implementations for the following cases: |
---|
26 | * YCbCr => RGB color conversion only. |
---|
27 | * Sampling ratios of 2h1v or 2h2v. |
---|
28 | * No scaling needed at upsample time. |
---|
29 | * Corner-aligned (non-CCIR601) sampling alignment. |
---|
30 | * Other special cases could be added, but in most applications these are |
---|
31 | * the only common cases. (For uncommon cases we fall back on the more |
---|
32 | * general code in jdsample.c and jdcolor.c.) |
---|
33 | */ |
---|
34 | |
---|
35 | #define JPEG_INTERNALS |
---|
36 | #include "jinclude.h" |
---|
37 | #include "jpeglib.h" |
---|
38 | |
---|
39 | #ifdef UPSAMPLE_MERGING_SUPPORTED |
---|
40 | |
---|
41 | #ifdef HAVE_MMX_INTEL_MNEMONICS |
---|
42 | __int64 const1 = 0x59BA0000D24B59BA; // Cr_r Cr_b Cr_g Cr_r |
---|
43 | __int64 const2 = 0x00007168E9FA0000; // Cb-r Cb_b Cb_g Cb_r |
---|
44 | __int64 const5 = 0x0000D24B59BA0000; // Cr_b Cr_g Cr_r Cr_b |
---|
45 | __int64 const6 = 0x7168E9FA00007168; // Cb_b Cb_g Cb_r Cb_b |
---|
46 | |
---|
47 | // constants for factors (One_Half/fix(x)) << 2 |
---|
48 | |
---|
49 | __int64 const05 = 0x0001000000000001; // Cr_r Cr_b Cr_g Cr_r |
---|
50 | __int64 const15 = 0x00000001FFFA0000; // Cb-r Cb_b Cb_g Cb_r |
---|
51 | __int64 const45 = 0x0000000000010000; // Cr_b Cr_g Cr_r Cr_b |
---|
52 | __int64 const55 = 0x0001FFFA00000001; // Cb_b Cb_g Cb_r Cb_b |
---|
53 | #endif |
---|
54 | |
---|
55 | /* Private subobject */ |
---|
56 | |
---|
57 | typedef struct { |
---|
58 | struct jpeg_upsampler pub; /* public fields */ |
---|
59 | |
---|
60 | /* Pointer to routine to do actual upsampling/conversion of one row group */ |
---|
61 | JMETHOD(void, upmethod, (j_decompress_ptr cinfo, |
---|
62 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
63 | JSAMPARRAY output_buf)); |
---|
64 | |
---|
65 | /* Private state for YCC->RGB conversion */ |
---|
66 | int * Cr_r_tab; /* => table for Cr to R conversion */ |
---|
67 | int * Cb_b_tab; /* => table for Cb to B conversion */ |
---|
68 | INT32 * Cr_g_tab; /* => table for Cr to G conversion */ |
---|
69 | INT32 * Cb_g_tab; /* => table for Cb to G conversion */ |
---|
70 | |
---|
71 | /* For 2:1 vertical sampling, we produce two output rows at a time. |
---|
72 | * We need a "spare" row buffer to hold the second output row if the |
---|
73 | * application provides just a one-row buffer; we also use the spare |
---|
74 | * to discard the dummy last row if the image height is odd. |
---|
75 | */ |
---|
76 | JSAMPROW spare_row; |
---|
77 | boolean spare_full; /* T if spare buffer is occupied */ |
---|
78 | |
---|
79 | JDIMENSION out_row_width; /* samples per output row */ |
---|
80 | JDIMENSION rows_to_go; /* counts rows remaining in image */ |
---|
81 | } my_upsampler; |
---|
82 | |
---|
83 | typedef my_upsampler * my_upsample_ptr; |
---|
84 | |
---|
85 | #define SCALEBITS 16 /* speediest right-shift on some machines */ |
---|
86 | #define ONE_HALF ((INT32) 1 << (SCALEBITS-1)) |
---|
87 | #define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5)) |
---|
88 | |
---|
89 | |
---|
90 | /* |
---|
91 | * Initialize tables for YCC->RGB colorspace conversion. |
---|
92 | * This is taken directly from jdcolor.c; see that file for more info. |
---|
93 | */ |
---|
94 | |
---|
95 | LOCAL(void) |
---|
96 | build_ycc_rgb_table (j_decompress_ptr cinfo) |
---|
97 | { |
---|
98 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
99 | int i; |
---|
100 | INT32 x; |
---|
101 | SHIFT_TEMPS |
---|
102 | |
---|
103 | upsample->Cr_r_tab = (int *) |
---|
104 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
---|
105 | (MAXJSAMPLE+1) * SIZEOF(int)); |
---|
106 | upsample->Cb_b_tab = (int *) |
---|
107 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
---|
108 | (MAXJSAMPLE+1) * SIZEOF(int)); |
---|
109 | upsample->Cr_g_tab = (INT32 *) |
---|
110 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
---|
111 | (MAXJSAMPLE+1) * SIZEOF(INT32)); |
---|
112 | upsample->Cb_g_tab = (INT32 *) |
---|
113 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
---|
114 | (MAXJSAMPLE+1) * SIZEOF(INT32)); |
---|
115 | |
---|
116 | for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { |
---|
117 | /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ |
---|
118 | /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */ |
---|
119 | /* Cr=>R value is nearest int to 1.40200 * x */ |
---|
120 | upsample->Cr_r_tab[i] = (int) |
---|
121 | RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); |
---|
122 | /* Cb=>B value is nearest int to 1.77200 * x */ |
---|
123 | upsample->Cb_b_tab[i] = (int) |
---|
124 | RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); |
---|
125 | /* Cr=>G value is scaled-up -0.71414 * x */ |
---|
126 | upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x; |
---|
127 | /* Cb=>G value is scaled-up -0.34414 * x */ |
---|
128 | /* We also add in ONE_HALF so that need not do it in inner loop */ |
---|
129 | upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF; |
---|
130 | } |
---|
131 | } |
---|
132 | |
---|
133 | |
---|
134 | /* |
---|
135 | * Initialize for an upsampling pass. |
---|
136 | */ |
---|
137 | |
---|
138 | METHODDEF(void) |
---|
139 | start_pass_merged_upsample (j_decompress_ptr cinfo) |
---|
140 | { |
---|
141 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
142 | |
---|
143 | /* Mark the spare buffer empty */ |
---|
144 | upsample->spare_full = FALSE; |
---|
145 | /* Initialize total-height counter for detecting bottom of image */ |
---|
146 | upsample->rows_to_go = cinfo->output_height; |
---|
147 | } |
---|
148 | |
---|
149 | |
---|
150 | /* |
---|
151 | * Control routine to do upsampling (and color conversion). |
---|
152 | * |
---|
153 | * The control routine just handles the row buffering considerations. |
---|
154 | */ |
---|
155 | |
---|
156 | METHODDEF(void) |
---|
157 | merged_2v_upsample (j_decompress_ptr cinfo, |
---|
158 | JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, |
---|
159 | JDIMENSION in_row_groups_avail, |
---|
160 | JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, |
---|
161 | JDIMENSION out_rows_avail) |
---|
162 | /* 2:1 vertical sampling case: may need a spare row. */ |
---|
163 | { |
---|
164 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
165 | JSAMPROW work_ptrs[2]; |
---|
166 | JDIMENSION num_rows; /* number of rows returned to caller */ |
---|
167 | |
---|
168 | if (upsample->spare_full) { |
---|
169 | /* If we have a spare row saved from a previous cycle, just return it. */ |
---|
170 | jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0, |
---|
171 | 1, upsample->out_row_width); |
---|
172 | num_rows = 1; |
---|
173 | upsample->spare_full = FALSE; |
---|
174 | } else { |
---|
175 | /* Figure number of rows to return to caller. */ |
---|
176 | num_rows = 2; |
---|
177 | /* Not more than the distance to the end of the image. */ |
---|
178 | if (num_rows > upsample->rows_to_go) |
---|
179 | num_rows = upsample->rows_to_go; |
---|
180 | /* And not more than what the client can accept: */ |
---|
181 | out_rows_avail -= *out_row_ctr; |
---|
182 | if (num_rows > out_rows_avail) |
---|
183 | num_rows = out_rows_avail; |
---|
184 | /* Create output pointer array for upsampler. */ |
---|
185 | work_ptrs[0] = output_buf[*out_row_ctr]; |
---|
186 | if (num_rows > 1) { |
---|
187 | work_ptrs[1] = output_buf[*out_row_ctr + 1]; |
---|
188 | } else { |
---|
189 | work_ptrs[1] = upsample->spare_row; |
---|
190 | upsample->spare_full = TRUE; |
---|
191 | } |
---|
192 | /* Now do the upsampling. */ |
---|
193 | (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs); |
---|
194 | } |
---|
195 | |
---|
196 | /* Adjust counts */ |
---|
197 | *out_row_ctr += num_rows; |
---|
198 | upsample->rows_to_go -= num_rows; |
---|
199 | /* When the buffer is emptied, declare this input row group consumed */ |
---|
200 | if (! upsample->spare_full) |
---|
201 | (*in_row_group_ctr)++; |
---|
202 | } |
---|
203 | |
---|
204 | |
---|
205 | METHODDEF(void) |
---|
206 | merged_1v_upsample (j_decompress_ptr cinfo, |
---|
207 | JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr, |
---|
208 | JDIMENSION in_row_groups_avail, |
---|
209 | JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, |
---|
210 | JDIMENSION out_rows_avail) |
---|
211 | /* 1:1 vertical sampling case: much easier, never need a spare row. */ |
---|
212 | { |
---|
213 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
214 | |
---|
215 | /* Just do the upsampling. */ |
---|
216 | (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, |
---|
217 | output_buf + *out_row_ctr); |
---|
218 | /* Adjust counts */ |
---|
219 | (*out_row_ctr)++; |
---|
220 | (*in_row_group_ctr)++; |
---|
221 | } |
---|
222 | |
---|
223 | |
---|
224 | /* |
---|
225 | * These are the routines invoked by the control routines to do |
---|
226 | * the actual upsampling/conversion. One row group is processed per call. |
---|
227 | * |
---|
228 | * Note: since we may be writing directly into application-supplied buffers, |
---|
229 | * we have to be honest about the output width; we can't assume the buffer |
---|
230 | * has been rounded up to an even width. |
---|
231 | */ |
---|
232 | |
---|
233 | |
---|
234 | /* |
---|
235 | * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. |
---|
236 | */ |
---|
237 | |
---|
238 | METHODDEF(void) |
---|
239 | h2v1_merged_upsample (j_decompress_ptr cinfo, |
---|
240 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
241 | JSAMPARRAY output_buf) |
---|
242 | { |
---|
243 | |
---|
244 | |
---|
245 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
246 | register int y, cred, cgreen, cblue; |
---|
247 | int cb, cr; |
---|
248 | register JSAMPROW outptr; |
---|
249 | JSAMPROW inptr0, inptr1, inptr2; |
---|
250 | JDIMENSION col; |
---|
251 | /* copy these pointers into registers if possible */ |
---|
252 | register JSAMPLE * range_limit = cinfo->sample_range_limit; |
---|
253 | int * Crrtab = upsample->Cr_r_tab; |
---|
254 | int * Cbbtab = upsample->Cb_b_tab; |
---|
255 | INT32 * Crgtab = upsample->Cr_g_tab; |
---|
256 | INT32 * Cbgtab = upsample->Cb_g_tab; |
---|
257 | SHIFT_TEMPS |
---|
258 | |
---|
259 | inptr0 = input_buf[0][in_row_group_ctr]; |
---|
260 | inptr1 = input_buf[1][in_row_group_ctr]; |
---|
261 | inptr2 = input_buf[2][in_row_group_ctr]; |
---|
262 | outptr = output_buf[0]; |
---|
263 | /* Loop for each pair of output pixels */ |
---|
264 | for (col = cinfo->output_width >> 1; col > 0; col--) { |
---|
265 | /* Do the chroma part of the calculation */ |
---|
266 | cb = GETJSAMPLE(*inptr1++); |
---|
267 | cr = GETJSAMPLE(*inptr2++); |
---|
268 | cred = Crrtab[cr]; |
---|
269 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
270 | cblue = Cbbtab[cb]; |
---|
271 | /* Fetch 2 Y values and emit 2 pixels */ |
---|
272 | y = GETJSAMPLE(*inptr0++); |
---|
273 | outptr[RGB_RED] = range_limit[y + cred]; |
---|
274 | outptr[RGB_GREEN] = range_limit[y + cgreen]; |
---|
275 | outptr[RGB_BLUE] = range_limit[y + cblue]; |
---|
276 | outptr += RGB_PIXELSIZE; |
---|
277 | y = GETJSAMPLE(*inptr0++); |
---|
278 | outptr[RGB_RED] = range_limit[y + cred]; |
---|
279 | outptr[RGB_GREEN] = range_limit[y + cgreen]; |
---|
280 | outptr[RGB_BLUE] = range_limit[y + cblue]; |
---|
281 | outptr += RGB_PIXELSIZE; |
---|
282 | } |
---|
283 | /* If image width is odd, do the last output column separately */ |
---|
284 | if (cinfo->output_width & 1) { |
---|
285 | cb = GETJSAMPLE(*inptr1); |
---|
286 | cr = GETJSAMPLE(*inptr2); |
---|
287 | cred = Crrtab[cr]; |
---|
288 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
289 | cblue = Cbbtab[cb]; |
---|
290 | y = GETJSAMPLE(*inptr0); |
---|
291 | outptr[RGB_RED] = range_limit[y + cred]; |
---|
292 | outptr[RGB_GREEN] = range_limit[y + cgreen]; |
---|
293 | outptr[RGB_BLUE] = range_limit[y + cblue]; |
---|
294 | } |
---|
295 | } |
---|
296 | |
---|
297 | |
---|
298 | /* |
---|
299 | * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. |
---|
300 | */ |
---|
301 | |
---|
302 | #ifdef HAVE_MMX_INTEL_MNEMONICS |
---|
303 | __inline METHODDEF(void) |
---|
304 | h2v2_merged_upsample_orig (j_decompress_ptr cinfo, |
---|
305 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
306 | JSAMPARRAY output_buf); |
---|
307 | __inline METHODDEF(void) |
---|
308 | h2v2_merged_upsample_mmx (j_decompress_ptr cinfo, |
---|
309 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
310 | JSAMPARRAY output_buf); |
---|
311 | #endif |
---|
312 | |
---|
313 | METHODDEF(void) |
---|
314 | h2v2_merged_upsample (j_decompress_ptr cinfo, |
---|
315 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
316 | JSAMPARRAY output_buf); |
---|
317 | |
---|
318 | #ifdef HAVE_MMX_INTEL_MNEMONICS |
---|
319 | METHODDEF(void) |
---|
320 | h2v2_merged_upsample (j_decompress_ptr cinfo, |
---|
321 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
322 | JSAMPARRAY output_buf) |
---|
323 | { |
---|
324 | if (MMXAvailable && (cinfo->image_width >= 8)) |
---|
325 | h2v2_merged_upsample_mmx (cinfo, input_buf, in_row_group_ctr, output_buf); |
---|
326 | else |
---|
327 | h2v2_merged_upsample_orig (cinfo, input_buf, in_row_group_ctr, output_buf); |
---|
328 | |
---|
329 | } |
---|
330 | |
---|
331 | __inline METHODDEF(void) |
---|
332 | h2v2_merged_upsample_orig (j_decompress_ptr cinfo, |
---|
333 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
334 | JSAMPARRAY output_buf) |
---|
335 | { |
---|
336 | |
---|
337 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
338 | register int y, cred, cgreen, cblue; |
---|
339 | int cb, cr; |
---|
340 | register JSAMPROW outptr0, outptr1; |
---|
341 | JSAMPROW inptr00, inptr01, inptr1, inptr2; |
---|
342 | JDIMENSION col; |
---|
343 | /* copy these pointers into registers if possible */ |
---|
344 | register JSAMPLE * range_limit = cinfo->sample_range_limit; |
---|
345 | int * Crrtab = upsample->Cr_r_tab; |
---|
346 | int * Cbbtab = upsample->Cb_b_tab; |
---|
347 | INT32 * Crgtab = upsample->Cr_g_tab; |
---|
348 | INT32 * Cbgtab = upsample->Cb_g_tab; |
---|
349 | SHIFT_TEMPS |
---|
350 | |
---|
351 | inptr00 = input_buf[0][in_row_group_ctr*2]; |
---|
352 | inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; |
---|
353 | inptr1 = input_buf[1][in_row_group_ctr]; |
---|
354 | inptr2 = input_buf[2][in_row_group_ctr]; |
---|
355 | outptr0 = output_buf[0]; |
---|
356 | outptr1 = output_buf[1]; |
---|
357 | /* Loop for each group of output pixels */ |
---|
358 | for (col = cinfo->output_width >> 1; col > 0; col--) { |
---|
359 | /* Do the chroma part of the calculation */ |
---|
360 | cb = GETJSAMPLE(*inptr1++); |
---|
361 | cr = GETJSAMPLE(*inptr2++); |
---|
362 | cred = Crrtab[cr]; |
---|
363 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
364 | cblue = Cbbtab[cb]; |
---|
365 | /* Fetch 4 Y values and emit 4 pixels */ |
---|
366 | y = GETJSAMPLE(*inptr00++); |
---|
367 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
368 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
369 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
370 | outptr0 += RGB_PIXELSIZE; |
---|
371 | y = GETJSAMPLE(*inptr00++); |
---|
372 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
373 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
374 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
375 | outptr0 += RGB_PIXELSIZE; |
---|
376 | y = GETJSAMPLE(*inptr01++); |
---|
377 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
378 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
379 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
380 | outptr1 += RGB_PIXELSIZE; |
---|
381 | y = GETJSAMPLE(*inptr01++); |
---|
382 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
383 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
384 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
385 | outptr1 += RGB_PIXELSIZE; |
---|
386 | } |
---|
387 | /* If image width is odd, do the last output column separately */ |
---|
388 | if (cinfo->output_width & 1) { |
---|
389 | cb = GETJSAMPLE(*inptr1); |
---|
390 | cr = GETJSAMPLE(*inptr2); |
---|
391 | cred = Crrtab[cr]; |
---|
392 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
393 | cblue = Cbbtab[cb]; |
---|
394 | y = GETJSAMPLE(*inptr00); |
---|
395 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
396 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
397 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
398 | y = GETJSAMPLE(*inptr01); |
---|
399 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
400 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
401 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
402 | } |
---|
403 | } |
---|
404 | |
---|
405 | /* |
---|
406 | * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical. |
---|
407 | */ |
---|
408 | __inline METHODDEF(void) |
---|
409 | h2v2_merged_upsample_mmx (j_decompress_ptr cinfo, |
---|
410 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
411 | JSAMPARRAY output_buf) |
---|
412 | { |
---|
413 | // added for MMX |
---|
414 | __int64 const128 = 0x0080008000800080; |
---|
415 | __int64 empty = 0x0000000000000000; |
---|
416 | __int64 davemask = 0x0000FFFFFFFF0000; |
---|
417 | //////////////////////////////// |
---|
418 | |
---|
419 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
420 | register int y, cred, cgreen, cblue; |
---|
421 | int cb, cr; |
---|
422 | register JSAMPROW outptr0, outptr1; |
---|
423 | JSAMPROW inptr00, inptr01, inptr1, inptr2; |
---|
424 | JDIMENSION col; |
---|
425 | /* copy these pointers into registers if possible */ |
---|
426 | register JSAMPLE * range_limit = cinfo->sample_range_limit; |
---|
427 | int * Crrtab = upsample->Cr_r_tab; |
---|
428 | int * Cbbtab = upsample->Cb_b_tab; |
---|
429 | INT32 * Crgtab = upsample->Cr_g_tab; |
---|
430 | INT32 * Cbgtab = upsample->Cb_g_tab; |
---|
431 | SHIFT_TEMPS |
---|
432 | |
---|
433 | |
---|
434 | // Added for MMX |
---|
435 | register int width = cinfo->image_width; |
---|
436 | int cols = cinfo->output_width; |
---|
437 | int cols_asm = (cols >> 3); |
---|
438 | int diff = cols - (cols_asm<<3); |
---|
439 | int cols_asm_copy = cols_asm; |
---|
440 | |
---|
441 | /////////////////////////////////////// |
---|
442 | |
---|
443 | inptr00 = input_buf[0][in_row_group_ctr*2]; |
---|
444 | inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; |
---|
445 | inptr1 = input_buf[1][in_row_group_ctr]; |
---|
446 | inptr2 = input_buf[2][in_row_group_ctr]; |
---|
447 | outptr0 = output_buf[0]; |
---|
448 | outptr1 = output_buf[1]; |
---|
449 | /* Loop for each group of output pixels */ |
---|
450 | |
---|
451 | |
---|
452 | _asm |
---|
453 | { |
---|
454 | mov esi, inptr00 |
---|
455 | |
---|
456 | mov eax, inptr01 |
---|
457 | |
---|
458 | mov ebx, inptr2 |
---|
459 | |
---|
460 | mov ecx, inptr1 |
---|
461 | |
---|
462 | mov edi, outptr0 |
---|
463 | |
---|
464 | mov edx, outptr1 |
---|
465 | |
---|
466 | do_next16: |
---|
467 | |
---|
468 | movd mm0, [ebx] ; Cr7 Cr6.....Cr1 Cr0 |
---|
469 | |
---|
470 | pxor mm6, mm6 |
---|
471 | |
---|
472 | punpcklbw mm0, mm0 ; Cr3 Cr3 Cr2 Cr2 Cr1 Cr1 Cr0 Cr0 |
---|
473 | |
---|
474 | movq mm7, const128 |
---|
475 | |
---|
476 | punpcklwd mm0, mm0 ; Cr1 Cr1 Cr1 Cr1 Cr0 Cr0 Cr0 Cr0 |
---|
477 | |
---|
478 | movq mm4, mm0 |
---|
479 | |
---|
480 | punpcklbw mm0, mm6 ; Cr0 Cr0 Cr0 Cr0 |
---|
481 | |
---|
482 | psubsw mm0, mm7 ; Cr0 - 128:Cr0-128:Cr0-128:Cr0 -128 |
---|
483 | |
---|
484 | movd mm1, [ecx] ; Cb7 Cb6...... Cb1 Cb0 |
---|
485 | |
---|
486 | psllw mm0, 2 ; left shift by 2 bits |
---|
487 | |
---|
488 | punpcklbw mm1, mm1 ; Cb3 Cb3 Cb2 Cb2 Cb1 Cb1 Cb0 Cb0 |
---|
489 | |
---|
490 | paddsw mm0, const05 ; add (one_half/fix(x)) << 2 |
---|
491 | |
---|
492 | punpcklwd mm1, mm1 ; Cb1 Cb1 Cb1 Cb1 Cb0 Cb0 Cb0 Cb0 |
---|
493 | |
---|
494 | movq mm5, mm1 |
---|
495 | |
---|
496 | pmulhw mm0, const1 ; multiply by (fix(x) >> 1) |
---|
497 | |
---|
498 | punpcklbw mm1, mm6 ; Cb0 Cb0 Cb0 Cb0 |
---|
499 | |
---|
500 | punpckhbw mm4, mm6 ; Cr1 Cr1 Cr1 Cr1 |
---|
501 | |
---|
502 | psubsw mm1, mm7 ; Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128 |
---|
503 | |
---|
504 | punpckhbw mm5, mm6 ; Cb1 Cb1 Cb1 Cb1 |
---|
505 | |
---|
506 | psllw mm1, 2 ; left shift by 2 bits |
---|
507 | |
---|
508 | paddsw mm1, const15 ; add (one_half/fix(x)) << 2 |
---|
509 | |
---|
510 | psubsw mm4, mm7 ; Cr1 - 128:Cr1-128:Cr1-128:Cr1 -128 |
---|
511 | |
---|
512 | psubsw mm5, mm7 ; Cb1 - 128:Cb1-128:Cb1-128:Cb1 -128 |
---|
513 | |
---|
514 | pmulhw mm1, const2 ; multiply by (fix(x) >> 1) |
---|
515 | |
---|
516 | psllw mm4, 2 ; left shift by 2 bits |
---|
517 | |
---|
518 | psllw mm5, 2 ; left shift by 2 bits |
---|
519 | |
---|
520 | paddsw mm4, const45 ; add (one_half/fix(x)) << 2 |
---|
521 | |
---|
522 | movd mm7, [esi] ; Y13 Y12 Y9 Y8 Y5 Y4 Y1 Y0 |
---|
523 | |
---|
524 | pmulhw mm4, const5 ; multiply by (fix(x) >> 1) |
---|
525 | |
---|
526 | movq mm6, mm7 |
---|
527 | |
---|
528 | punpcklbw mm7, mm7 ; Y5 Y5 Y4 Y4 Y1 Y1 Y0 Y0 |
---|
529 | |
---|
530 | paddsw mm5, const55 ; add (one_half/fix(x)) << 2 |
---|
531 | |
---|
532 | paddsw mm0, mm1 ; cred0 cbl0 cgr0 cred0 |
---|
533 | |
---|
534 | movq mm1, mm7 |
---|
535 | |
---|
536 | pmulhw mm5, const6 ; multiply by (fix(x) >> 1) |
---|
537 | |
---|
538 | movq mm2, mm0 ; cred0 cbl0 cgr0 cred0 |
---|
539 | |
---|
540 | punpcklwd mm7, mm6 ; Y5 Y4 Y1 Y1 Y1 Y0 Y0 Y0 |
---|
541 | |
---|
542 | pand mm2, davemask ; 0 cbl0 cgr0 0 |
---|
543 | |
---|
544 | psrlq mm1, 16 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1 |
---|
545 | |
---|
546 | psrlq mm2, 16 ; 0 0 cbl0 cgr0 |
---|
547 | |
---|
548 | punpcklbw mm7, empty ; Y1 Y0 Y0 Y0 |
---|
549 | |
---|
550 | paddsw mm4, mm5 ; cbl1 cgr1 cred1 cbl1 |
---|
551 | |
---|
552 | movq mm3, mm4 ; cbl1 cgr1 cred1 cbl1 |
---|
553 | |
---|
554 | pand mm3, davemask ; 0 cgr1 cred1 0 |
---|
555 | |
---|
556 | paddsw mm7, mm0 ; r1 b0 g0 r0 |
---|
557 | |
---|
558 | psllq mm3, 16 ; cgr1 cred1 0 0 |
---|
559 | |
---|
560 | movq mm6, mm1 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1 |
---|
561 | |
---|
562 | por mm2, mm3 ; cgr1 cred1 cbl0 cgr0 |
---|
563 | |
---|
564 | punpcklbw mm6, empty ; Y4 Y4 Y1 Y1 |
---|
565 | |
---|
566 | movd mm3, [eax] ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2 |
---|
567 | |
---|
568 | paddsw mm6, mm2 ; g4 r4 b1 g1 |
---|
569 | |
---|
570 | packuswb mm7, mm6 ; g4 r4 b1 g1 r1 b0 g0 r0 |
---|
571 | |
---|
572 | movq mm6, mm3 ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2 |
---|
573 | |
---|
574 | punpcklbw mm3, mm3 ; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2 |
---|
575 | |
---|
576 | movq [edi], mm7 ; move to memory g4 r4 b1 g1 r1 b0 g0 r0 |
---|
577 | |
---|
578 | movq mm5, mm3 ; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2 |
---|
579 | |
---|
580 | punpcklwd mm3, mm6 ; X X X X Y3 Y2 Y2 Y2 |
---|
581 | |
---|
582 | punpcklbw mm3, empty ; Y3 Y2 Y2 Y2 |
---|
583 | |
---|
584 | psrlq mm5, 16 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3 |
---|
585 | |
---|
586 | paddsw mm3, mm0 ; r3 b2 g2 r2 |
---|
587 | |
---|
588 | movq mm6, mm5 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3 |
---|
589 | |
---|
590 | movq mm0, mm1 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1 |
---|
591 | |
---|
592 | punpckldq mm6, mm6 ; X X X X Y6 Y6 Y3 Y3 |
---|
593 | |
---|
594 | punpcklbw mm6, empty ; Y6 Y6 Y3 Y3 |
---|
595 | |
---|
596 | psrlq mm1, 24 ; 0 0 0 0 0 Y5 Y5 Y4 |
---|
597 | |
---|
598 | paddsw mm6, mm2 ; g6 r6 b3 g3 |
---|
599 | |
---|
600 | packuswb mm3, mm6 ; g6 r6 b3 g3 r3 b2 g2 r2 |
---|
601 | |
---|
602 | movq mm2, mm5 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3 |
---|
603 | |
---|
604 | psrlq mm0, 32 ; 0 0 0 0 0 0 Y5 Y5 |
---|
605 | |
---|
606 | movq [edx], mm3 ; move to memory g6 r6 b3 g3 r3 b2 g2 r2 |
---|
607 | |
---|
608 | punpcklwd mm1, mm0 ; X X X X Y5 Y5 Y5 Y4 |
---|
609 | |
---|
610 | psrlq mm5, 24 ; 0 0 0 0 0 Y7 Y7 Y6 |
---|
611 | |
---|
612 | movd mm0, [ebx] ; Cr9 Cr8.....Cr3 Cr2 |
---|
613 | |
---|
614 | psrlq mm2, 32 ; 0 0 0 0 0 0 Y7 Y7 |
---|
615 | |
---|
616 | psrlq mm0, 16 |
---|
617 | |
---|
618 | punpcklbw mm1, empty ; Y5 Y5 Y5 Y4 |
---|
619 | |
---|
620 | punpcklwd mm5, mm2 ; X X X X Y7 Y7 Y7 Y6 |
---|
621 | |
---|
622 | paddsw mm1, mm4 ; b5 g5 r5 b4 |
---|
623 | |
---|
624 | punpcklbw mm5, empty ; Y7 Y7 Y7 Y6 |
---|
625 | |
---|
626 | pxor mm6, mm6 ; clear mm6 registr |
---|
627 | |
---|
628 | punpcklbw mm0, mm0 ; X X X X Cr3 Cr3 Cr2 Cr2 |
---|
629 | |
---|
630 | paddsw mm5, mm4 ; b7 g7 r7 b6 |
---|
631 | |
---|
632 | punpcklwd mm0, mm0 ; Cr3 Cr3 Cr3 Cr3 Cr2 Cr2 Cr2 Cr2 |
---|
633 | |
---|
634 | movq mm4, mm0 |
---|
635 | |
---|
636 | movd mm3, [ecx] ; Cb9 Cb8...... Cb3 Cb2 |
---|
637 | |
---|
638 | punpcklbw mm0, mm6 ; Cr2 Cr2 Cr2 Cr2 |
---|
639 | |
---|
640 | psrlq mm3, 16 |
---|
641 | |
---|
642 | psubsw mm0, const128 ; Cr2 - 128:Cr2-128:Cr2-128:Cr2 -128 |
---|
643 | |
---|
644 | punpcklbw mm3, mm3 ; X X X X Cb3 Cb3 Cb2 Cb2 |
---|
645 | |
---|
646 | psllw mm0, 2 ; left shift by 2 bits |
---|
647 | |
---|
648 | paddsw mm0, const05 ; add (one_half/fix(x)) << 2 |
---|
649 | |
---|
650 | punpcklwd mm3, mm3 ; Cb3 Cb3 Cb3 Cb3 Cb2 Cb2 Cb2 Cb2 |
---|
651 | |
---|
652 | movq mm7, mm3 |
---|
653 | |
---|
654 | pmulhw mm0, const1 ; multiply by (fix(x) >> 1) |
---|
655 | |
---|
656 | punpcklbw mm3, mm6 ; Cb2 Cb2 Cb2 Cb2 |
---|
657 | |
---|
658 | psubsw mm3, const128 ; Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128 |
---|
659 | |
---|
660 | punpckhbw mm4, mm6 ; Cr3 Cr3 Cr3 Cr3 |
---|
661 | |
---|
662 | psllw mm3, 2 ; left shift by 2 bits |
---|
663 | |
---|
664 | paddsw mm3, const15 ; add (one_half/fix(x)) << 2 |
---|
665 | |
---|
666 | punpckhbw mm7, mm6 ; Cb3 Cb3 Cb3 Cb3 |
---|
667 | |
---|
668 | pmulhw mm3, const2 ; multiply by (fix(x) >> 1) |
---|
669 | |
---|
670 | psubsw mm7, const128 ; Cb3 - 128:Cb3-128:Cb3-128:Cb3 -128 |
---|
671 | |
---|
672 | paddsw mm0, mm3 ; cred2 cbl2 cgr2 cred2 |
---|
673 | |
---|
674 | psllw mm7, 2 ; left shift by 2 bits |
---|
675 | |
---|
676 | psubsw mm4, const128 ; Cr3 - 128:Cr3-128:Cr3-128:Cr3 -128 |
---|
677 | |
---|
678 | movd mm3, [esi+4] ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8 |
---|
679 | |
---|
680 | psllw mm4, 2 ; left shift by 2 bits |
---|
681 | |
---|
682 | paddsw mm7, const55 ; add (one_half/fix(x)) << 2 |
---|
683 | |
---|
684 | movq mm6, mm3 ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8 |
---|
685 | |
---|
686 | movq mm2, mm0 |
---|
687 | |
---|
688 | pand mm2, davemask |
---|
689 | |
---|
690 | punpcklbw mm3, mm3 ; Y13 Y13 Y12 Y12 Y9 Y9 Y8 Y8 |
---|
691 | |
---|
692 | psrlq mm2, 16 |
---|
693 | |
---|
694 | paddsw mm4, const45 ; add (one_half/fix(x)) << 2 |
---|
695 | |
---|
696 | punpcklwd mm3, mm6 ; X X X X Y9 Y8 Y8 Y8 |
---|
697 | |
---|
698 | pmulhw mm4, const5 ; multiply by (fix(x) >> 1) |
---|
699 | |
---|
700 | pmulhw mm7, const6 ; multiply by (fix(x) >> 1) |
---|
701 | |
---|
702 | punpcklbw mm3, empty ; Y9 Y8 Y8 Y8 |
---|
703 | |
---|
704 | paddsw mm4, mm7 ; cbl3 cgr3 cred3 cbl3 |
---|
705 | |
---|
706 | paddsw mm3, mm0 ; r9 b8 g8 r8 |
---|
707 | |
---|
708 | movq mm7, mm4 |
---|
709 | |
---|
710 | packuswb mm1, mm3 ; r9 b8 g8 r8 b5 g5 r5 b4 |
---|
711 | |
---|
712 | movd mm3, [eax+4] ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10 |
---|
713 | |
---|
714 | pand mm7, davemask |
---|
715 | |
---|
716 | psrlq mm6, 8 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9 |
---|
717 | |
---|
718 | psllq mm7, 16 |
---|
719 | |
---|
720 | movq [edi+8], mm1 ; move to memory r9 b8 g8 r8 b5 g5 r5 b4 |
---|
721 | |
---|
722 | por mm2, mm7 |
---|
723 | |
---|
724 | movq mm7, mm3 ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10 |
---|
725 | |
---|
726 | punpcklbw mm3, mm3 ; X X X X Y11 Y11 Y10 Y10 |
---|
727 | |
---|
728 | pxor mm1, mm1 |
---|
729 | |
---|
730 | punpcklwd mm3, mm7 ; X X X X Y11 Y10 Y10 Y10 |
---|
731 | |
---|
732 | punpcklbw mm3, mm1 ; Y11 Y10 Y10 Y10 |
---|
733 | |
---|
734 | psrlq mm7, 8 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11 |
---|
735 | |
---|
736 | paddsw mm3, mm0 ; r11 b10 g10 r10 |
---|
737 | |
---|
738 | movq mm0, mm7 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11 |
---|
739 | |
---|
740 | packuswb mm5, mm3 ; r11 b10 g10 r10 b7 g7 r7 b6 |
---|
741 | |
---|
742 | punpcklbw mm7, mm7 ; X X X X Y14 Y14 Y11 Y11 |
---|
743 | |
---|
744 | movq [edx+8], mm5 ; move to memory r11 b10 g10 r10 b7 g7 r7 b6 |
---|
745 | |
---|
746 | movq mm3, mm6 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9 |
---|
747 | |
---|
748 | punpcklbw mm6, mm6 ; X X X X Y12 Y12 Y9 Y9 |
---|
749 | |
---|
750 | punpcklbw mm7, mm1 ; Y14 Y14 Y11 Y11 |
---|
751 | |
---|
752 | punpcklbw mm6, mm1 ; Y12 Y12 Y9 Y9 |
---|
753 | |
---|
754 | paddsw mm7, mm2 ; g14 r14 b11 g11 |
---|
755 | |
---|
756 | paddsw mm6, mm2 ; g12 r12 b9 g9 |
---|
757 | |
---|
758 | psrlq mm3, 8 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12 |
---|
759 | |
---|
760 | movq mm1, mm3 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12 |
---|
761 | |
---|
762 | punpcklbw mm3, mm3 ; X X X X Y13 Y13 Y12 Y12 |
---|
763 | |
---|
764 | add esi, 8 |
---|
765 | |
---|
766 | psrlq mm3, 16 ; X X X X X X Y13 Y13 modified on 09/24 |
---|
767 | |
---|
768 | punpcklwd mm1, mm3 ; X X X X Y13 Y13 Y13 Y12 |
---|
769 | |
---|
770 | add eax, 8 |
---|
771 | |
---|
772 | psrlq mm0, 8 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14 |
---|
773 | |
---|
774 | punpcklbw mm1, empty ; Y13 Y13 Y13 Y12 |
---|
775 | |
---|
776 | movq mm5, mm0 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14 |
---|
777 | |
---|
778 | punpcklbw mm0, mm0 ; X X X X Y15 Y15 Y14 Y14 |
---|
779 | |
---|
780 | paddsw mm1, mm4 ; b13 g13 r13 b12 |
---|
781 | |
---|
782 | psrlq mm0, 16 ; X X X X X X Y15 Y15 |
---|
783 | |
---|
784 | add edi, 24 |
---|
785 | |
---|
786 | punpcklwd mm5, mm0 ; X X X X Y15 Y15 Y15 Y14 |
---|
787 | |
---|
788 | packuswb mm6, mm1 ; b13 g13 r13 b12 g12 r12 b9 g9 |
---|
789 | |
---|
790 | add edx, 24 |
---|
791 | |
---|
792 | punpcklbw mm5, empty ; Y15 Y15 Y15 Y14 |
---|
793 | |
---|
794 | add ebx, 4 |
---|
795 | |
---|
796 | paddsw mm5, mm4 ; b15 g15 r15 b14 |
---|
797 | |
---|
798 | movq [edi-8], mm6 ; move to memory b13 g13 r13 b12 g12 r12 b9 g9 |
---|
799 | |
---|
800 | packuswb mm7, mm5 ; b15 g15 r15 b14 g14 r14 b11 g11 |
---|
801 | |
---|
802 | add ecx, 4 |
---|
803 | |
---|
804 | movq [edx-8], mm7 ; move to memory b15 g15 r15 b14 g14 r14 b11 g11 |
---|
805 | |
---|
806 | dec cols_asm |
---|
807 | |
---|
808 | jnz do_next16 |
---|
809 | |
---|
810 | EMMS |
---|
811 | |
---|
812 | } |
---|
813 | |
---|
814 | |
---|
815 | inptr1 += (cols_asm_copy<<2); |
---|
816 | |
---|
817 | inptr2 += (cols_asm_copy<<2); |
---|
818 | |
---|
819 | inptr00 += (cols_asm_copy<<3); |
---|
820 | |
---|
821 | inptr01 += (cols_asm_copy<<3); |
---|
822 | |
---|
823 | outptr0 += cols_asm_copy*24; |
---|
824 | |
---|
825 | outptr1 += cols_asm_copy*24; |
---|
826 | |
---|
827 | //for (col = cinfo->output_width >> 1; col > 0; col--) { |
---|
828 | /* Do the chroma part of the calculation */ |
---|
829 | /*cb = GETJSAMPLE(*inptr1++); |
---|
830 | cr = GETJSAMPLE(*inptr2++); |
---|
831 | cred = Crrtab[cr]; |
---|
832 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
833 | cblue = Cbbtab[cb];*/ |
---|
834 | /* Fetch 4 Y values and emit 4 pixels */ |
---|
835 | /*y = GETJSAMPLE(*inptr00++); |
---|
836 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
837 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
838 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
839 | outptr0 += RGB_PIXELSIZE; |
---|
840 | y = GETJSAMPLE(*inptr00++); |
---|
841 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
842 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
843 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
844 | outptr0 += RGB_PIXELSIZE; |
---|
845 | y = GETJSAMPLE(*inptr01++); |
---|
846 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
847 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
848 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
849 | outptr1 += RGB_PIXELSIZE; |
---|
850 | y = GETJSAMPLE(*inptr01++); |
---|
851 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
852 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
853 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
854 | outptr1 += RGB_PIXELSIZE; |
---|
855 | } */ |
---|
856 | |
---|
857 | |
---|
858 | for (col = diff >> 1; col > 0; col--) { |
---|
859 | /* Do the chroma part of the calculation */ |
---|
860 | cb = GETJSAMPLE(*inptr1++); |
---|
861 | cr = GETJSAMPLE(*inptr2++); |
---|
862 | cred = Crrtab[cr]; |
---|
863 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
864 | cblue = Cbbtab[cb]; |
---|
865 | /* Fetch 4 Y values and emit 4 pixels */ |
---|
866 | y = GETJSAMPLE(*inptr00++); |
---|
867 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
868 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
869 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
870 | outptr0 += RGB_PIXELSIZE; |
---|
871 | y = GETJSAMPLE(*inptr00++); |
---|
872 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
873 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
874 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
875 | outptr0 += RGB_PIXELSIZE; |
---|
876 | y = GETJSAMPLE(*inptr01++); |
---|
877 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
878 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
879 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
880 | outptr1 += RGB_PIXELSIZE; |
---|
881 | y = GETJSAMPLE(*inptr01++); |
---|
882 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
883 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
884 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
885 | outptr1 += RGB_PIXELSIZE; |
---|
886 | } |
---|
887 | |
---|
888 | |
---|
889 | /* If image width is odd, do the last output column separately */ |
---|
890 | //if (cinfo->output_width & 1) { |
---|
891 | if (diff & 1) { |
---|
892 | cb = GETJSAMPLE(*inptr1); |
---|
893 | cr = GETJSAMPLE(*inptr2); |
---|
894 | cred = Crrtab[cr]; |
---|
895 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
896 | cblue = Cbbtab[cb]; |
---|
897 | y = GETJSAMPLE(*inptr00); |
---|
898 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
899 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
900 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
901 | y = GETJSAMPLE(*inptr01); |
---|
902 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
903 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
904 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
905 | } |
---|
906 | } |
---|
907 | #else |
---|
908 | |
---|
909 | |
---|
910 | METHODDEF(void) |
---|
911 | h2v2_merged_upsample (j_decompress_ptr cinfo, |
---|
912 | JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, |
---|
913 | JSAMPARRAY output_buf) |
---|
914 | { |
---|
915 | my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; |
---|
916 | register int y, cred, cgreen, cblue; |
---|
917 | int cb, cr; |
---|
918 | register JSAMPROW outptr0, outptr1; |
---|
919 | JSAMPROW inptr00, inptr01, inptr1, inptr2; |
---|
920 | JDIMENSION col; |
---|
921 | /* copy these pointers into registers if possible */ |
---|
922 | register JSAMPLE * range_limit = cinfo->sample_range_limit; |
---|
923 | int * Crrtab = upsample->Cr_r_tab; |
---|
924 | int * Cbbtab = upsample->Cb_b_tab; |
---|
925 | INT32 * Crgtab = upsample->Cr_g_tab; |
---|
926 | INT32 * Cbgtab = upsample->Cb_g_tab; |
---|
927 | SHIFT_TEMPS |
---|
928 | |
---|
929 | inptr00 = input_buf[0][in_row_group_ctr*2]; |
---|
930 | inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; |
---|
931 | inptr1 = input_buf[1][in_row_group_ctr]; |
---|
932 | inptr2 = input_buf[2][in_row_group_ctr]; |
---|
933 | outptr0 = output_buf[0]; |
---|
934 | outptr1 = output_buf[1]; |
---|
935 | /* Loop for each group of output pixels */ |
---|
936 | for (col = cinfo->output_width >> 1; col > 0; col--) { |
---|
937 | /* Do the chroma part of the calculation */ |
---|
938 | cb = GETJSAMPLE(*inptr1++); |
---|
939 | cr = GETJSAMPLE(*inptr2++); |
---|
940 | cred = Crrtab[cr]; |
---|
941 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
942 | cblue = Cbbtab[cb]; |
---|
943 | /* Fetch 4 Y values and emit 4 pixels */ |
---|
944 | y = GETJSAMPLE(*inptr00++); |
---|
945 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
946 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
947 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
948 | outptr0 += RGB_PIXELSIZE; |
---|
949 | y = GETJSAMPLE(*inptr00++); |
---|
950 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
951 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
952 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
953 | outptr0 += RGB_PIXELSIZE; |
---|
954 | y = GETJSAMPLE(*inptr01++); |
---|
955 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
956 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
957 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
958 | outptr1 += RGB_PIXELSIZE; |
---|
959 | y = GETJSAMPLE(*inptr01++); |
---|
960 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
961 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
962 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
963 | outptr1 += RGB_PIXELSIZE; |
---|
964 | } |
---|
965 | /* If image width is odd, do the last output column separately */ |
---|
966 | if (cinfo->output_width & 1) { |
---|
967 | cb = GETJSAMPLE(*inptr1); |
---|
968 | cr = GETJSAMPLE(*inptr2); |
---|
969 | cred = Crrtab[cr]; |
---|
970 | cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); |
---|
971 | cblue = Cbbtab[cb]; |
---|
972 | y = GETJSAMPLE(*inptr00); |
---|
973 | outptr0[RGB_RED] = range_limit[y + cred]; |
---|
974 | outptr0[RGB_GREEN] = range_limit[y + cgreen]; |
---|
975 | outptr0[RGB_BLUE] = range_limit[y + cblue]; |
---|
976 | y = GETJSAMPLE(*inptr01); |
---|
977 | outptr1[RGB_RED] = range_limit[y + cred]; |
---|
978 | outptr1[RGB_GREEN] = range_limit[y + cgreen]; |
---|
979 | outptr1[RGB_BLUE] = range_limit[y + cblue]; |
---|
980 | } |
---|
981 | } |
---|
982 | #endif |
---|
983 | |
---|
984 | |
---|
985 | /* |
---|
986 | * Module initialization routine for merged upsampling/color conversion. |
---|
987 | * |
---|
988 | * NB: this is called under the conditions determined by use_merged_upsample() |
---|
989 | * in jdmaster.c. That routine MUST correspond to the actual capabilities |
---|
990 | * of this module; no safety checks are made here. |
---|
991 | */ |
---|
992 | |
---|
993 | GLOBAL(void) |
---|
994 | jinit_merged_upsampler (j_decompress_ptr cinfo) |
---|
995 | { |
---|
996 | my_upsample_ptr upsample; |
---|
997 | |
---|
998 | upsample = (my_upsample_ptr) |
---|
999 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
---|
1000 | SIZEOF(my_upsampler)); |
---|
1001 | cinfo->upsample = (struct jpeg_upsampler *) upsample; |
---|
1002 | upsample->pub.start_pass = start_pass_merged_upsample; |
---|
1003 | upsample->pub.need_context_rows = FALSE; |
---|
1004 | |
---|
1005 | upsample->out_row_width = cinfo->output_width * cinfo->out_color_components; |
---|
1006 | |
---|
1007 | if (cinfo->max_v_samp_factor == 2) { |
---|
1008 | upsample->pub.upsample = merged_2v_upsample; |
---|
1009 | upsample->upmethod = h2v2_merged_upsample; |
---|
1010 | /* Allocate a spare row buffer */ |
---|
1011 | upsample->spare_row = (JSAMPROW) |
---|
1012 | (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
---|
1013 | (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE))); |
---|
1014 | } else { |
---|
1015 | upsample->pub.upsample = merged_1v_upsample; |
---|
1016 | upsample->upmethod = h2v1_merged_upsample; |
---|
1017 | /* No spare row needed */ |
---|
1018 | upsample->spare_row = NULL; |
---|
1019 | } |
---|
1020 | |
---|
1021 | build_ycc_rgb_table(cinfo); |
---|
1022 | } |
---|
1023 | |
---|
1024 | #endif /* UPSAMPLE_MERGING_SUPPORTED */ |
---|