00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavcore/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "lpc.h"
00040 #include "ac3dec.h"
00041 #include "vorbis.h"
00042 #include "png.h"
00043
00044 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00045 uint32_t ff_squareTbl[512] = {0, };
00046
00047
00048 #define pb_7f (~0UL/255 * 0x7f)
00049 #define pb_80 (~0UL/255 * 0x80)
00050
00051 const uint8_t ff_zigzag_direct[64] = {
00052 0, 1, 8, 16, 9, 2, 3, 10,
00053 17, 24, 32, 25, 18, 11, 4, 5,
00054 12, 19, 26, 33, 40, 48, 41, 34,
00055 27, 20, 13, 6, 7, 14, 21, 28,
00056 35, 42, 49, 56, 57, 50, 43, 36,
00057 29, 22, 15, 23, 30, 37, 44, 51,
00058 58, 59, 52, 45, 38, 31, 39, 46,
00059 53, 60, 61, 54, 47, 55, 62, 63
00060 };
00061
00062
00063
00064 const uint8_t ff_zigzag248_direct[64] = {
00065 0, 8, 1, 9, 16, 24, 2, 10,
00066 17, 25, 32, 40, 48, 56, 33, 41,
00067 18, 26, 3, 11, 4, 12, 19, 27,
00068 34, 42, 49, 57, 50, 58, 35, 43,
00069 20, 28, 5, 13, 6, 14, 21, 29,
00070 36, 44, 51, 59, 52, 60, 37, 45,
00071 22, 30, 7, 15, 23, 31, 38, 46,
00072 53, 61, 54, 62, 39, 47, 55, 63,
00073 };
00074
00075
00076 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00077
00078 const uint8_t ff_alternate_horizontal_scan[64] = {
00079 0, 1, 2, 3, 8, 9, 16, 17,
00080 10, 11, 4, 5, 6, 7, 15, 14,
00081 13, 12, 19, 18, 24, 25, 32, 33,
00082 26, 27, 20, 21, 22, 23, 28, 29,
00083 30, 31, 34, 35, 40, 41, 48, 49,
00084 42, 43, 36, 37, 38, 39, 44, 45,
00085 46, 47, 50, 51, 56, 57, 58, 59,
00086 52, 53, 54, 55, 60, 61, 62, 63,
00087 };
00088
00089 const uint8_t ff_alternate_vertical_scan[64] = {
00090 0, 8, 16, 24, 1, 9, 2, 10,
00091 17, 25, 32, 40, 48, 56, 57, 49,
00092 41, 33, 26, 18, 3, 11, 4, 12,
00093 19, 27, 34, 42, 50, 58, 35, 43,
00094 51, 59, 20, 28, 5, 13, 6, 14,
00095 21, 29, 36, 44, 52, 60, 37, 45,
00096 53, 61, 22, 30, 7, 15, 23, 31,
00097 38, 46, 54, 62, 39, 47, 55, 63,
00098 };
00099
00100
00101 static const uint8_t simple_mmx_permutation[64]={
00102 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00103 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00104 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00105 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00106 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00107 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00108 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00109 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00110 };
00111
00112 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00113
00114 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00115 int i;
00116 int end;
00117
00118 st->scantable= src_scantable;
00119
00120 for(i=0; i<64; i++){
00121 int j;
00122 j = src_scantable[i];
00123 st->permutated[i] = permutation[j];
00124 #if ARCH_PPC
00125 st->inverse[j] = i;
00126 #endif
00127 }
00128
00129 end=-1;
00130 for(i=0; i<64; i++){
00131 int j;
00132 j = st->permutated[i];
00133 if(j>end) end=j;
00134 st->raster_end[i]= end;
00135 }
00136 }
00137
00138 static int pix_sum_c(uint8_t * pix, int line_size)
00139 {
00140 int s, i, j;
00141
00142 s = 0;
00143 for (i = 0; i < 16; i++) {
00144 for (j = 0; j < 16; j += 8) {
00145 s += pix[0];
00146 s += pix[1];
00147 s += pix[2];
00148 s += pix[3];
00149 s += pix[4];
00150 s += pix[5];
00151 s += pix[6];
00152 s += pix[7];
00153 pix += 8;
00154 }
00155 pix += line_size - 16;
00156 }
00157 return s;
00158 }
00159
00160 static int pix_norm1_c(uint8_t * pix, int line_size)
00161 {
00162 int s, i, j;
00163 uint32_t *sq = ff_squareTbl + 256;
00164
00165 s = 0;
00166 for (i = 0; i < 16; i++) {
00167 for (j = 0; j < 16; j += 8) {
00168 #if 0
00169 s += sq[pix[0]];
00170 s += sq[pix[1]];
00171 s += sq[pix[2]];
00172 s += sq[pix[3]];
00173 s += sq[pix[4]];
00174 s += sq[pix[5]];
00175 s += sq[pix[6]];
00176 s += sq[pix[7]];
00177 #else
00178 #if LONG_MAX > 2147483647
00179 register uint64_t x=*(uint64_t*)pix;
00180 s += sq[x&0xff];
00181 s += sq[(x>>8)&0xff];
00182 s += sq[(x>>16)&0xff];
00183 s += sq[(x>>24)&0xff];
00184 s += sq[(x>>32)&0xff];
00185 s += sq[(x>>40)&0xff];
00186 s += sq[(x>>48)&0xff];
00187 s += sq[(x>>56)&0xff];
00188 #else
00189 register uint32_t x=*(uint32_t*)pix;
00190 s += sq[x&0xff];
00191 s += sq[(x>>8)&0xff];
00192 s += sq[(x>>16)&0xff];
00193 s += sq[(x>>24)&0xff];
00194 x=*(uint32_t*)(pix+4);
00195 s += sq[x&0xff];
00196 s += sq[(x>>8)&0xff];
00197 s += sq[(x>>16)&0xff];
00198 s += sq[(x>>24)&0xff];
00199 #endif
00200 #endif
00201 pix += 8;
00202 }
00203 pix += line_size - 16;
00204 }
00205 return s;
00206 }
00207
00208 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00209 int i;
00210
00211 for(i=0; i+8<=w; i+=8){
00212 dst[i+0]= av_bswap32(src[i+0]);
00213 dst[i+1]= av_bswap32(src[i+1]);
00214 dst[i+2]= av_bswap32(src[i+2]);
00215 dst[i+3]= av_bswap32(src[i+3]);
00216 dst[i+4]= av_bswap32(src[i+4]);
00217 dst[i+5]= av_bswap32(src[i+5]);
00218 dst[i+6]= av_bswap32(src[i+6]);
00219 dst[i+7]= av_bswap32(src[i+7]);
00220 }
00221 for(;i<w; i++){
00222 dst[i+0]= av_bswap32(src[i+0]);
00223 }
00224 }
00225
00226 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00227 {
00228 int s, i;
00229 uint32_t *sq = ff_squareTbl + 256;
00230
00231 s = 0;
00232 for (i = 0; i < h; i++) {
00233 s += sq[pix1[0] - pix2[0]];
00234 s += sq[pix1[1] - pix2[1]];
00235 s += sq[pix1[2] - pix2[2]];
00236 s += sq[pix1[3] - pix2[3]];
00237 pix1 += line_size;
00238 pix2 += line_size;
00239 }
00240 return s;
00241 }
00242
00243 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00244 {
00245 int s, i;
00246 uint32_t *sq = ff_squareTbl + 256;
00247
00248 s = 0;
00249 for (i = 0; i < h; i++) {
00250 s += sq[pix1[0] - pix2[0]];
00251 s += sq[pix1[1] - pix2[1]];
00252 s += sq[pix1[2] - pix2[2]];
00253 s += sq[pix1[3] - pix2[3]];
00254 s += sq[pix1[4] - pix2[4]];
00255 s += sq[pix1[5] - pix2[5]];
00256 s += sq[pix1[6] - pix2[6]];
00257 s += sq[pix1[7] - pix2[7]];
00258 pix1 += line_size;
00259 pix2 += line_size;
00260 }
00261 return s;
00262 }
00263
00264 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00265 {
00266 int s, i;
00267 uint32_t *sq = ff_squareTbl + 256;
00268
00269 s = 0;
00270 for (i = 0; i < h; i++) {
00271 s += sq[pix1[ 0] - pix2[ 0]];
00272 s += sq[pix1[ 1] - pix2[ 1]];
00273 s += sq[pix1[ 2] - pix2[ 2]];
00274 s += sq[pix1[ 3] - pix2[ 3]];
00275 s += sq[pix1[ 4] - pix2[ 4]];
00276 s += sq[pix1[ 5] - pix2[ 5]];
00277 s += sq[pix1[ 6] - pix2[ 6]];
00278 s += sq[pix1[ 7] - pix2[ 7]];
00279 s += sq[pix1[ 8] - pix2[ 8]];
00280 s += sq[pix1[ 9] - pix2[ 9]];
00281 s += sq[pix1[10] - pix2[10]];
00282 s += sq[pix1[11] - pix2[11]];
00283 s += sq[pix1[12] - pix2[12]];
00284 s += sq[pix1[13] - pix2[13]];
00285 s += sq[pix1[14] - pix2[14]];
00286 s += sq[pix1[15] - pix2[15]];
00287
00288 pix1 += line_size;
00289 pix2 += line_size;
00290 }
00291 return s;
00292 }
00293
00294
00295
00296 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
00297 {
00298 uint8_t *ptr, *last_line;
00299 int i;
00300
00301 last_line = buf + (height - 1) * wrap;
00302 for(i=0;i<w;i++) {
00303
00304 memcpy(buf - (i + 1) * wrap, buf, width);
00305 memcpy(last_line + (i + 1) * wrap, last_line, width);
00306 }
00307
00308 ptr = buf;
00309 for(i=0;i<height;i++) {
00310 memset(ptr - w, ptr[0], w);
00311 memset(ptr + width, ptr[width-1], w);
00312 ptr += wrap;
00313 }
00314
00315 for(i=0;i<w;i++) {
00316 memset(buf - (i + 1) * wrap - w, buf[0], w);
00317 memset(buf - (i + 1) * wrap + width, buf[width-1], w);
00318 memset(last_line + (i + 1) * wrap - w, last_line[0], w);
00319 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w);
00320 }
00321 }
00322
00335 void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
00336 int src_x, int src_y, int w, int h){
00337 int x, y;
00338 int start_y, start_x, end_y, end_x;
00339
00340 if(src_y>= h){
00341 src+= (h-1-src_y)*linesize;
00342 src_y=h-1;
00343 }else if(src_y<=-block_h){
00344 src+= (1-block_h-src_y)*linesize;
00345 src_y=1-block_h;
00346 }
00347 if(src_x>= w){
00348 src+= (w-1-src_x);
00349 src_x=w-1;
00350 }else if(src_x<=-block_w){
00351 src+= (1-block_w-src_x);
00352 src_x=1-block_w;
00353 }
00354
00355 start_y= FFMAX(0, -src_y);
00356 start_x= FFMAX(0, -src_x);
00357 end_y= FFMIN(block_h, h-src_y);
00358 end_x= FFMIN(block_w, w-src_x);
00359
00360
00361 for(y=start_y; y<end_y; y++){
00362 for(x=start_x; x<end_x; x++){
00363 buf[x + y*linesize]= src[x + y*linesize];
00364 }
00365 }
00366
00367
00368 for(y=0; y<start_y; y++){
00369 for(x=start_x; x<end_x; x++){
00370 buf[x + y*linesize]= buf[x + start_y*linesize];
00371 }
00372 }
00373
00374
00375 for(y=end_y; y<block_h; y++){
00376 for(x=start_x; x<end_x; x++){
00377 buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
00378 }
00379 }
00380
00381 for(y=0; y<block_h; y++){
00382
00383 for(x=0; x<start_x; x++){
00384 buf[x + y*linesize]= buf[start_x + y*linesize];
00385 }
00386
00387
00388 for(x=end_x; x<block_w; x++){
00389 buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
00390 }
00391 }
00392 }
00393
00394 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00395 {
00396 int i;
00397
00398
00399 for(i=0;i<8;i++) {
00400 block[0] = pixels[0];
00401 block[1] = pixels[1];
00402 block[2] = pixels[2];
00403 block[3] = pixels[3];
00404 block[4] = pixels[4];
00405 block[5] = pixels[5];
00406 block[6] = pixels[6];
00407 block[7] = pixels[7];
00408 pixels += line_size;
00409 block += 8;
00410 }
00411 }
00412
00413 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00414 const uint8_t *s2, int stride){
00415 int i;
00416
00417
00418 for(i=0;i<8;i++) {
00419 block[0] = s1[0] - s2[0];
00420 block[1] = s1[1] - s2[1];
00421 block[2] = s1[2] - s2[2];
00422 block[3] = s1[3] - s2[3];
00423 block[4] = s1[4] - s2[4];
00424 block[5] = s1[5] - s2[5];
00425 block[6] = s1[6] - s2[6];
00426 block[7] = s1[7] - s2[7];
00427 s1 += stride;
00428 s2 += stride;
00429 block += 8;
00430 }
00431 }
00432
00433
00434 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00435 int line_size)
00436 {
00437 int i;
00438 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00439
00440
00441 for(i=0;i<8;i++) {
00442 pixels[0] = cm[block[0]];
00443 pixels[1] = cm[block[1]];
00444 pixels[2] = cm[block[2]];
00445 pixels[3] = cm[block[3]];
00446 pixels[4] = cm[block[4]];
00447 pixels[5] = cm[block[5]];
00448 pixels[6] = cm[block[6]];
00449 pixels[7] = cm[block[7]];
00450
00451 pixels += line_size;
00452 block += 8;
00453 }
00454 }
00455
00456 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00457 int line_size)
00458 {
00459 int i;
00460 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00461
00462
00463 for(i=0;i<4;i++) {
00464 pixels[0] = cm[block[0]];
00465 pixels[1] = cm[block[1]];
00466 pixels[2] = cm[block[2]];
00467 pixels[3] = cm[block[3]];
00468
00469 pixels += line_size;
00470 block += 8;
00471 }
00472 }
00473
00474 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00475 int line_size)
00476 {
00477 int i;
00478 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00479
00480
00481 for(i=0;i<2;i++) {
00482 pixels[0] = cm[block[0]];
00483 pixels[1] = cm[block[1]];
00484
00485 pixels += line_size;
00486 block += 8;
00487 }
00488 }
00489
00490 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00491 uint8_t *restrict pixels,
00492 int line_size)
00493 {
00494 int i, j;
00495
00496 for (i = 0; i < 8; i++) {
00497 for (j = 0; j < 8; j++) {
00498 if (*block < -128)
00499 *pixels = 0;
00500 else if (*block > 127)
00501 *pixels = 255;
00502 else
00503 *pixels = (uint8_t)(*block + 128);
00504 block++;
00505 pixels++;
00506 }
00507 pixels += (line_size - 8);
00508 }
00509 }
00510
00511 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00512 int line_size)
00513 {
00514 int i;
00515
00516
00517 for(i=0;i<8;i++) {
00518 pixels[0] = block[0];
00519 pixels[1] = block[1];
00520 pixels[2] = block[2];
00521 pixels[3] = block[3];
00522 pixels[4] = block[4];
00523 pixels[5] = block[5];
00524 pixels[6] = block[6];
00525 pixels[7] = block[7];
00526
00527 pixels += line_size;
00528 block += 8;
00529 }
00530 }
00531
00532 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00533 int line_size)
00534 {
00535 int i;
00536 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00537
00538
00539 for(i=0;i<8;i++) {
00540 pixels[0] = cm[pixels[0] + block[0]];
00541 pixels[1] = cm[pixels[1] + block[1]];
00542 pixels[2] = cm[pixels[2] + block[2]];
00543 pixels[3] = cm[pixels[3] + block[3]];
00544 pixels[4] = cm[pixels[4] + block[4]];
00545 pixels[5] = cm[pixels[5] + block[5]];
00546 pixels[6] = cm[pixels[6] + block[6]];
00547 pixels[7] = cm[pixels[7] + block[7]];
00548 pixels += line_size;
00549 block += 8;
00550 }
00551 }
00552
00553 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00554 int line_size)
00555 {
00556 int i;
00557 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00558
00559
00560 for(i=0;i<4;i++) {
00561 pixels[0] = cm[pixels[0] + block[0]];
00562 pixels[1] = cm[pixels[1] + block[1]];
00563 pixels[2] = cm[pixels[2] + block[2]];
00564 pixels[3] = cm[pixels[3] + block[3]];
00565 pixels += line_size;
00566 block += 8;
00567 }
00568 }
00569
00570 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00571 int line_size)
00572 {
00573 int i;
00574 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00575
00576
00577 for(i=0;i<2;i++) {
00578 pixels[0] = cm[pixels[0] + block[0]];
00579 pixels[1] = cm[pixels[1] + block[1]];
00580 pixels += line_size;
00581 block += 8;
00582 }
00583 }
00584
00585 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00586 {
00587 int i;
00588 for(i=0;i<8;i++) {
00589 pixels[0] += block[0];
00590 pixels[1] += block[1];
00591 pixels[2] += block[2];
00592 pixels[3] += block[3];
00593 pixels[4] += block[4];
00594 pixels[5] += block[5];
00595 pixels[6] += block[6];
00596 pixels[7] += block[7];
00597 pixels += line_size;
00598 block += 8;
00599 }
00600 }
00601
00602 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00603 {
00604 int i;
00605 for(i=0;i<4;i++) {
00606 pixels[0] += block[0];
00607 pixels[1] += block[1];
00608 pixels[2] += block[2];
00609 pixels[3] += block[3];
00610 pixels += line_size;
00611 block += 4;
00612 }
00613 }
00614
00615 static int sum_abs_dctelem_c(DCTELEM *block)
00616 {
00617 int sum=0, i;
00618 for(i=0; i<64; i++)
00619 sum+= FFABS(block[i]);
00620 return sum;
00621 }
00622
00623 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00624 {
00625 int i;
00626
00627 for (i = 0; i < h; i++) {
00628 memset(block, value, 16);
00629 block += line_size;
00630 }
00631 }
00632
00633 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00634 {
00635 int i;
00636
00637 for (i = 0; i < h; i++) {
00638 memset(block, value, 8);
00639 block += line_size;
00640 }
00641 }
00642
00643 static void scale_block_c(const uint8_t src[64], uint8_t *dst, int linesize)
00644 {
00645 int i, j;
00646 uint16_t *dst1 = (uint16_t *) dst;
00647 uint16_t *dst2 = (uint16_t *)(dst + linesize);
00648
00649 for (j = 0; j < 8; j++) {
00650 for (i = 0; i < 8; i++) {
00651 dst1[i] = dst2[i] = src[i] * 0x0101;
00652 }
00653 src += 8;
00654 dst1 += linesize;
00655 dst2 += linesize;
00656 }
00657 }
00658
00659 #if 0
00660
00661 #define PIXOP2(OPNAME, OP) \
00662 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00663 {\
00664 int i;\
00665 for(i=0; i<h; i++){\
00666 OP(*((uint64_t*)block), AV_RN64(pixels));\
00667 pixels+=line_size;\
00668 block +=line_size;\
00669 }\
00670 }\
00671 \
00672 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00673 {\
00674 int i;\
00675 for(i=0; i<h; i++){\
00676 const uint64_t a= AV_RN64(pixels );\
00677 const uint64_t b= AV_RN64(pixels+1);\
00678 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00679 pixels+=line_size;\
00680 block +=line_size;\
00681 }\
00682 }\
00683 \
00684 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00685 {\
00686 int i;\
00687 for(i=0; i<h; i++){\
00688 const uint64_t a= AV_RN64(pixels );\
00689 const uint64_t b= AV_RN64(pixels+1);\
00690 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00691 pixels+=line_size;\
00692 block +=line_size;\
00693 }\
00694 }\
00695 \
00696 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00697 {\
00698 int i;\
00699 for(i=0; i<h; i++){\
00700 const uint64_t a= AV_RN64(pixels );\
00701 const uint64_t b= AV_RN64(pixels+line_size);\
00702 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00703 pixels+=line_size;\
00704 block +=line_size;\
00705 }\
00706 }\
00707 \
00708 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00709 {\
00710 int i;\
00711 for(i=0; i<h; i++){\
00712 const uint64_t a= AV_RN64(pixels );\
00713 const uint64_t b= AV_RN64(pixels+line_size);\
00714 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00715 pixels+=line_size;\
00716 block +=line_size;\
00717 }\
00718 }\
00719 \
00720 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00721 {\
00722 int i;\
00723 const uint64_t a= AV_RN64(pixels );\
00724 const uint64_t b= AV_RN64(pixels+1);\
00725 uint64_t l0= (a&0x0303030303030303ULL)\
00726 + (b&0x0303030303030303ULL)\
00727 + 0x0202020202020202ULL;\
00728 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00729 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00730 uint64_t l1,h1;\
00731 \
00732 pixels+=line_size;\
00733 for(i=0; i<h; i+=2){\
00734 uint64_t a= AV_RN64(pixels );\
00735 uint64_t b= AV_RN64(pixels+1);\
00736 l1= (a&0x0303030303030303ULL)\
00737 + (b&0x0303030303030303ULL);\
00738 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00739 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00740 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00741 pixels+=line_size;\
00742 block +=line_size;\
00743 a= AV_RN64(pixels );\
00744 b= AV_RN64(pixels+1);\
00745 l0= (a&0x0303030303030303ULL)\
00746 + (b&0x0303030303030303ULL)\
00747 + 0x0202020202020202ULL;\
00748 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00749 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00750 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00751 pixels+=line_size;\
00752 block +=line_size;\
00753 }\
00754 }\
00755 \
00756 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00757 {\
00758 int i;\
00759 const uint64_t a= AV_RN64(pixels );\
00760 const uint64_t b= AV_RN64(pixels+1);\
00761 uint64_t l0= (a&0x0303030303030303ULL)\
00762 + (b&0x0303030303030303ULL)\
00763 + 0x0101010101010101ULL;\
00764 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00765 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00766 uint64_t l1,h1;\
00767 \
00768 pixels+=line_size;\
00769 for(i=0; i<h; i+=2){\
00770 uint64_t a= AV_RN64(pixels );\
00771 uint64_t b= AV_RN64(pixels+1);\
00772 l1= (a&0x0303030303030303ULL)\
00773 + (b&0x0303030303030303ULL);\
00774 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00775 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00776 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00777 pixels+=line_size;\
00778 block +=line_size;\
00779 a= AV_RN64(pixels );\
00780 b= AV_RN64(pixels+1);\
00781 l0= (a&0x0303030303030303ULL)\
00782 + (b&0x0303030303030303ULL)\
00783 + 0x0101010101010101ULL;\
00784 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00785 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00786 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00787 pixels+=line_size;\
00788 block +=line_size;\
00789 }\
00790 }\
00791 \
00792 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00793 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00794 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00795 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00796 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00797 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00798 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00799
00800 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00801 #else // 64 bit variant
00802
00803 #define PIXOP2(OPNAME, OP) \
00804 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00805 int i;\
00806 for(i=0; i<h; i++){\
00807 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00808 pixels+=line_size;\
00809 block +=line_size;\
00810 }\
00811 }\
00812 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00813 int i;\
00814 for(i=0; i<h; i++){\
00815 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00816 pixels+=line_size;\
00817 block +=line_size;\
00818 }\
00819 }\
00820 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00821 int i;\
00822 for(i=0; i<h; i++){\
00823 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00824 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00825 pixels+=line_size;\
00826 block +=line_size;\
00827 }\
00828 }\
00829 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00830 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00831 }\
00832 \
00833 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00834 int src_stride1, int src_stride2, int h){\
00835 int i;\
00836 for(i=0; i<h; i++){\
00837 uint32_t a,b;\
00838 a= AV_RN32(&src1[i*src_stride1 ]);\
00839 b= AV_RN32(&src2[i*src_stride2 ]);\
00840 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00841 a= AV_RN32(&src1[i*src_stride1+4]);\
00842 b= AV_RN32(&src2[i*src_stride2+4]);\
00843 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00844 }\
00845 }\
00846 \
00847 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00848 int src_stride1, int src_stride2, int h){\
00849 int i;\
00850 for(i=0; i<h; i++){\
00851 uint32_t a,b;\
00852 a= AV_RN32(&src1[i*src_stride1 ]);\
00853 b= AV_RN32(&src2[i*src_stride2 ]);\
00854 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00855 a= AV_RN32(&src1[i*src_stride1+4]);\
00856 b= AV_RN32(&src2[i*src_stride2+4]);\
00857 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00858 }\
00859 }\
00860 \
00861 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00862 int src_stride1, int src_stride2, int h){\
00863 int i;\
00864 for(i=0; i<h; i++){\
00865 uint32_t a,b;\
00866 a= AV_RN32(&src1[i*src_stride1 ]);\
00867 b= AV_RN32(&src2[i*src_stride2 ]);\
00868 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00869 }\
00870 }\
00871 \
00872 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00873 int src_stride1, int src_stride2, int h){\
00874 int i;\
00875 for(i=0; i<h; i++){\
00876 uint32_t a,b;\
00877 a= AV_RN16(&src1[i*src_stride1 ]);\
00878 b= AV_RN16(&src2[i*src_stride2 ]);\
00879 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00880 }\
00881 }\
00882 \
00883 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00884 int src_stride1, int src_stride2, int h){\
00885 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00886 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00887 }\
00888 \
00889 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00890 int src_stride1, int src_stride2, int h){\
00891 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00892 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00893 }\
00894 \
00895 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00896 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00897 }\
00898 \
00899 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00900 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00901 }\
00902 \
00903 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00904 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00905 }\
00906 \
00907 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00908 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00909 }\
00910 \
00911 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00912 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00913 int i;\
00914 for(i=0; i<h; i++){\
00915 uint32_t a, b, c, d, l0, l1, h0, h1;\
00916 a= AV_RN32(&src1[i*src_stride1]);\
00917 b= AV_RN32(&src2[i*src_stride2]);\
00918 c= AV_RN32(&src3[i*src_stride3]);\
00919 d= AV_RN32(&src4[i*src_stride4]);\
00920 l0= (a&0x03030303UL)\
00921 + (b&0x03030303UL)\
00922 + 0x02020202UL;\
00923 h0= ((a&0xFCFCFCFCUL)>>2)\
00924 + ((b&0xFCFCFCFCUL)>>2);\
00925 l1= (c&0x03030303UL)\
00926 + (d&0x03030303UL);\
00927 h1= ((c&0xFCFCFCFCUL)>>2)\
00928 + ((d&0xFCFCFCFCUL)>>2);\
00929 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00930 a= AV_RN32(&src1[i*src_stride1+4]);\
00931 b= AV_RN32(&src2[i*src_stride2+4]);\
00932 c= AV_RN32(&src3[i*src_stride3+4]);\
00933 d= AV_RN32(&src4[i*src_stride4+4]);\
00934 l0= (a&0x03030303UL)\
00935 + (b&0x03030303UL)\
00936 + 0x02020202UL;\
00937 h0= ((a&0xFCFCFCFCUL)>>2)\
00938 + ((b&0xFCFCFCFCUL)>>2);\
00939 l1= (c&0x03030303UL)\
00940 + (d&0x03030303UL);\
00941 h1= ((c&0xFCFCFCFCUL)>>2)\
00942 + ((d&0xFCFCFCFCUL)>>2);\
00943 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00944 }\
00945 }\
00946 \
00947 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00948 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00949 }\
00950 \
00951 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00952 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00953 }\
00954 \
00955 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00956 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00957 }\
00958 \
00959 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00960 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00961 }\
00962 \
00963 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00964 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00965 int i;\
00966 for(i=0; i<h; i++){\
00967 uint32_t a, b, c, d, l0, l1, h0, h1;\
00968 a= AV_RN32(&src1[i*src_stride1]);\
00969 b= AV_RN32(&src2[i*src_stride2]);\
00970 c= AV_RN32(&src3[i*src_stride3]);\
00971 d= AV_RN32(&src4[i*src_stride4]);\
00972 l0= (a&0x03030303UL)\
00973 + (b&0x03030303UL)\
00974 + 0x01010101UL;\
00975 h0= ((a&0xFCFCFCFCUL)>>2)\
00976 + ((b&0xFCFCFCFCUL)>>2);\
00977 l1= (c&0x03030303UL)\
00978 + (d&0x03030303UL);\
00979 h1= ((c&0xFCFCFCFCUL)>>2)\
00980 + ((d&0xFCFCFCFCUL)>>2);\
00981 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00982 a= AV_RN32(&src1[i*src_stride1+4]);\
00983 b= AV_RN32(&src2[i*src_stride2+4]);\
00984 c= AV_RN32(&src3[i*src_stride3+4]);\
00985 d= AV_RN32(&src4[i*src_stride4+4]);\
00986 l0= (a&0x03030303UL)\
00987 + (b&0x03030303UL)\
00988 + 0x01010101UL;\
00989 h0= ((a&0xFCFCFCFCUL)>>2)\
00990 + ((b&0xFCFCFCFCUL)>>2);\
00991 l1= (c&0x03030303UL)\
00992 + (d&0x03030303UL);\
00993 h1= ((c&0xFCFCFCFCUL)>>2)\
00994 + ((d&0xFCFCFCFCUL)>>2);\
00995 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00996 }\
00997 }\
00998 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00999 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01000 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01001 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01002 }\
01003 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
01004 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01005 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01006 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01007 }\
01008 \
01009 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01010 {\
01011 int i, a0, b0, a1, b1;\
01012 a0= pixels[0];\
01013 b0= pixels[1] + 2;\
01014 a0 += b0;\
01015 b0 += pixels[2];\
01016 \
01017 pixels+=line_size;\
01018 for(i=0; i<h; i+=2){\
01019 a1= pixels[0];\
01020 b1= pixels[1];\
01021 a1 += b1;\
01022 b1 += pixels[2];\
01023 \
01024 block[0]= (a1+a0)>>2; \
01025 block[1]= (b1+b0)>>2;\
01026 \
01027 pixels+=line_size;\
01028 block +=line_size;\
01029 \
01030 a0= pixels[0];\
01031 b0= pixels[1] + 2;\
01032 a0 += b0;\
01033 b0 += pixels[2];\
01034 \
01035 block[0]= (a1+a0)>>2;\
01036 block[1]= (b1+b0)>>2;\
01037 pixels+=line_size;\
01038 block +=line_size;\
01039 }\
01040 }\
01041 \
01042 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01043 {\
01044 int i;\
01045 const uint32_t a= AV_RN32(pixels );\
01046 const uint32_t b= AV_RN32(pixels+1);\
01047 uint32_t l0= (a&0x03030303UL)\
01048 + (b&0x03030303UL)\
01049 + 0x02020202UL;\
01050 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01051 + ((b&0xFCFCFCFCUL)>>2);\
01052 uint32_t l1,h1;\
01053 \
01054 pixels+=line_size;\
01055 for(i=0; i<h; i+=2){\
01056 uint32_t a= AV_RN32(pixels );\
01057 uint32_t b= AV_RN32(pixels+1);\
01058 l1= (a&0x03030303UL)\
01059 + (b&0x03030303UL);\
01060 h1= ((a&0xFCFCFCFCUL)>>2)\
01061 + ((b&0xFCFCFCFCUL)>>2);\
01062 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01063 pixels+=line_size;\
01064 block +=line_size;\
01065 a= AV_RN32(pixels );\
01066 b= AV_RN32(pixels+1);\
01067 l0= (a&0x03030303UL)\
01068 + (b&0x03030303UL)\
01069 + 0x02020202UL;\
01070 h0= ((a&0xFCFCFCFCUL)>>2)\
01071 + ((b&0xFCFCFCFCUL)>>2);\
01072 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01073 pixels+=line_size;\
01074 block +=line_size;\
01075 }\
01076 }\
01077 \
01078 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01079 {\
01080 int j;\
01081 for(j=0; j<2; j++){\
01082 int i;\
01083 const uint32_t a= AV_RN32(pixels );\
01084 const uint32_t b= AV_RN32(pixels+1);\
01085 uint32_t l0= (a&0x03030303UL)\
01086 + (b&0x03030303UL)\
01087 + 0x02020202UL;\
01088 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01089 + ((b&0xFCFCFCFCUL)>>2);\
01090 uint32_t l1,h1;\
01091 \
01092 pixels+=line_size;\
01093 for(i=0; i<h; i+=2){\
01094 uint32_t a= AV_RN32(pixels );\
01095 uint32_t b= AV_RN32(pixels+1);\
01096 l1= (a&0x03030303UL)\
01097 + (b&0x03030303UL);\
01098 h1= ((a&0xFCFCFCFCUL)>>2)\
01099 + ((b&0xFCFCFCFCUL)>>2);\
01100 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01101 pixels+=line_size;\
01102 block +=line_size;\
01103 a= AV_RN32(pixels );\
01104 b= AV_RN32(pixels+1);\
01105 l0= (a&0x03030303UL)\
01106 + (b&0x03030303UL)\
01107 + 0x02020202UL;\
01108 h0= ((a&0xFCFCFCFCUL)>>2)\
01109 + ((b&0xFCFCFCFCUL)>>2);\
01110 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01111 pixels+=line_size;\
01112 block +=line_size;\
01113 }\
01114 pixels+=4-line_size*(h+1);\
01115 block +=4-line_size*h;\
01116 }\
01117 }\
01118 \
01119 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01120 {\
01121 int j;\
01122 for(j=0; j<2; j++){\
01123 int i;\
01124 const uint32_t a= AV_RN32(pixels );\
01125 const uint32_t b= AV_RN32(pixels+1);\
01126 uint32_t l0= (a&0x03030303UL)\
01127 + (b&0x03030303UL)\
01128 + 0x01010101UL;\
01129 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01130 + ((b&0xFCFCFCFCUL)>>2);\
01131 uint32_t l1,h1;\
01132 \
01133 pixels+=line_size;\
01134 for(i=0; i<h; i+=2){\
01135 uint32_t a= AV_RN32(pixels );\
01136 uint32_t b= AV_RN32(pixels+1);\
01137 l1= (a&0x03030303UL)\
01138 + (b&0x03030303UL);\
01139 h1= ((a&0xFCFCFCFCUL)>>2)\
01140 + ((b&0xFCFCFCFCUL)>>2);\
01141 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01142 pixels+=line_size;\
01143 block +=line_size;\
01144 a= AV_RN32(pixels );\
01145 b= AV_RN32(pixels+1);\
01146 l0= (a&0x03030303UL)\
01147 + (b&0x03030303UL)\
01148 + 0x01010101UL;\
01149 h0= ((a&0xFCFCFCFCUL)>>2)\
01150 + ((b&0xFCFCFCFCUL)>>2);\
01151 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01152 pixels+=line_size;\
01153 block +=line_size;\
01154 }\
01155 pixels+=4-line_size*(h+1);\
01156 block +=4-line_size*h;\
01157 }\
01158 }\
01159 \
01160 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01161 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01162 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01163 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01164 av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01165 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01166 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01167 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01168
01169 #define op_avg(a, b) a = rnd_avg32(a, b)
01170 #endif
01171 #define op_put(a, b) a = b
01172
01173 PIXOP2(avg, op_avg)
01174 PIXOP2(put, op_put)
01175 #undef op_avg
01176 #undef op_put
01177
01178 #define put_no_rnd_pixels8_c put_pixels8_c
01179 #define put_no_rnd_pixels16_c put_pixels16_c
01180
01181 #define avg2(a,b) ((a+b+1)>>1)
01182 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01183
01184 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01185 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01186 }
01187
01188 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01189 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01190 }
01191
01192 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01193 {
01194 const int A=(16-x16)*(16-y16);
01195 const int B=( x16)*(16-y16);
01196 const int C=(16-x16)*( y16);
01197 const int D=( x16)*( y16);
01198 int i;
01199
01200 for(i=0; i<h; i++)
01201 {
01202 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01203 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01204 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01205 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01206 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01207 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01208 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01209 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01210 dst+= stride;
01211 src+= stride;
01212 }
01213 }
01214
01215 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01216 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01217 {
01218 int y, vx, vy;
01219 const int s= 1<<shift;
01220
01221 width--;
01222 height--;
01223
01224 for(y=0; y<h; y++){
01225 int x;
01226
01227 vx= ox;
01228 vy= oy;
01229 for(x=0; x<8; x++){
01230 int src_x, src_y, frac_x, frac_y, index;
01231
01232 src_x= vx>>16;
01233 src_y= vy>>16;
01234 frac_x= src_x&(s-1);
01235 frac_y= src_y&(s-1);
01236 src_x>>=shift;
01237 src_y>>=shift;
01238
01239 if((unsigned)src_x < width){
01240 if((unsigned)src_y < height){
01241 index= src_x + src_y*stride;
01242 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01243 + src[index +1]* frac_x )*(s-frac_y)
01244 + ( src[index+stride ]*(s-frac_x)
01245 + src[index+stride+1]* frac_x )* frac_y
01246 + r)>>(shift*2);
01247 }else{
01248 index= src_x + av_clip(src_y, 0, height)*stride;
01249 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01250 + src[index +1]* frac_x )*s
01251 + r)>>(shift*2);
01252 }
01253 }else{
01254 if((unsigned)src_y < height){
01255 index= av_clip(src_x, 0, width) + src_y*stride;
01256 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01257 + src[index+stride ]* frac_y )*s
01258 + r)>>(shift*2);
01259 }else{
01260 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01261 dst[y*stride + x]= src[index ];
01262 }
01263 }
01264
01265 vx+= dxx;
01266 vy+= dyx;
01267 }
01268 ox += dxy;
01269 oy += dyy;
01270 }
01271 }
01272
01273 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01274 switch(width){
01275 case 2: put_pixels2_c (dst, src, stride, height); break;
01276 case 4: put_pixels4_c (dst, src, stride, height); break;
01277 case 8: put_pixels8_c (dst, src, stride, height); break;
01278 case 16:put_pixels16_c(dst, src, stride, height); break;
01279 }
01280 }
01281
01282 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01283 int i,j;
01284 for (i=0; i < height; i++) {
01285 for (j=0; j < width; j++) {
01286 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01287 }
01288 src += stride;
01289 dst += stride;
01290 }
01291 }
01292
01293 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01294 int i,j;
01295 for (i=0; i < height; i++) {
01296 for (j=0; j < width; j++) {
01297 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01298 }
01299 src += stride;
01300 dst += stride;
01301 }
01302 }
01303
01304 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01305 int i,j;
01306 for (i=0; i < height; i++) {
01307 for (j=0; j < width; j++) {
01308 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01309 }
01310 src += stride;
01311 dst += stride;
01312 }
01313 }
01314
01315 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01316 int i,j;
01317 for (i=0; i < height; i++) {
01318 for (j=0; j < width; j++) {
01319 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01320 }
01321 src += stride;
01322 dst += stride;
01323 }
01324 }
01325
01326 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01327 int i,j;
01328 for (i=0; i < height; i++) {
01329 for (j=0; j < width; j++) {
01330 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01331 }
01332 src += stride;
01333 dst += stride;
01334 }
01335 }
01336
01337 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01338 int i,j;
01339 for (i=0; i < height; i++) {
01340 for (j=0; j < width; j++) {
01341 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01342 }
01343 src += stride;
01344 dst += stride;
01345 }
01346 }
01347
01348 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01349 int i,j;
01350 for (i=0; i < height; i++) {
01351 for (j=0; j < width; j++) {
01352 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01353 }
01354 src += stride;
01355 dst += stride;
01356 }
01357 }
01358
01359 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01360 int i,j;
01361 for (i=0; i < height; i++) {
01362 for (j=0; j < width; j++) {
01363 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01364 }
01365 src += stride;
01366 dst += stride;
01367 }
01368 }
01369
01370 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01371 switch(width){
01372 case 2: avg_pixels2_c (dst, src, stride, height); break;
01373 case 4: avg_pixels4_c (dst, src, stride, height); break;
01374 case 8: avg_pixels8_c (dst, src, stride, height); break;
01375 case 16:avg_pixels16_c(dst, src, stride, height); break;
01376 }
01377 }
01378
01379 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01380 int i,j;
01381 for (i=0; i < height; i++) {
01382 for (j=0; j < width; j++) {
01383 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01384 }
01385 src += stride;
01386 dst += stride;
01387 }
01388 }
01389
01390 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01391 int i,j;
01392 for (i=0; i < height; i++) {
01393 for (j=0; j < width; j++) {
01394 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01395 }
01396 src += stride;
01397 dst += stride;
01398 }
01399 }
01400
01401 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01402 int i,j;
01403 for (i=0; i < height; i++) {
01404 for (j=0; j < width; j++) {
01405 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01406 }
01407 src += stride;
01408 dst += stride;
01409 }
01410 }
01411
01412 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01413 int i,j;
01414 for (i=0; i < height; i++) {
01415 for (j=0; j < width; j++) {
01416 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01417 }
01418 src += stride;
01419 dst += stride;
01420 }
01421 }
01422
01423 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01424 int i,j;
01425 for (i=0; i < height; i++) {
01426 for (j=0; j < width; j++) {
01427 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01428 }
01429 src += stride;
01430 dst += stride;
01431 }
01432 }
01433
01434 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01435 int i,j;
01436 for (i=0; i < height; i++) {
01437 for (j=0; j < width; j++) {
01438 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01439 }
01440 src += stride;
01441 dst += stride;
01442 }
01443 }
01444
01445 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01446 int i,j;
01447 for (i=0; i < height; i++) {
01448 for (j=0; j < width; j++) {
01449 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01450 }
01451 src += stride;
01452 dst += stride;
01453 }
01454 }
01455
01456 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01457 int i,j;
01458 for (i=0; i < height; i++) {
01459 for (j=0; j < width; j++) {
01460 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01461 }
01462 src += stride;
01463 dst += stride;
01464 }
01465 }
01466 #if 0
01467 #define TPEL_WIDTH(width)\
01468 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01469 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01470 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01471 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01472 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01473 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01474 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01475 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01476 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01477 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01478 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01479 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01480 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01481 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01482 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01483 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01484 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01485 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01486 #endif
01487
01488 #define H264_CHROMA_MC(OPNAME, OP)\
01489 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01490 const int A=(8-x)*(8-y);\
01491 const int B=( x)*(8-y);\
01492 const int C=(8-x)*( y);\
01493 const int D=( x)*( y);\
01494 int i;\
01495 \
01496 assert(x<8 && y<8 && x>=0 && y>=0);\
01497 \
01498 if(D){\
01499 for(i=0; i<h; i++){\
01500 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01501 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01502 dst+= stride;\
01503 src+= stride;\
01504 }\
01505 }else{\
01506 const int E= B+C;\
01507 const int step= C ? stride : 1;\
01508 for(i=0; i<h; i++){\
01509 OP(dst[0], (A*src[0] + E*src[step+0]));\
01510 OP(dst[1], (A*src[1] + E*src[step+1]));\
01511 dst+= stride;\
01512 src+= stride;\
01513 }\
01514 }\
01515 }\
01516 \
01517 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01518 const int A=(8-x)*(8-y);\
01519 const int B=( x)*(8-y);\
01520 const int C=(8-x)*( y);\
01521 const int D=( x)*( y);\
01522 int i;\
01523 \
01524 assert(x<8 && y<8 && x>=0 && y>=0);\
01525 \
01526 if(D){\
01527 for(i=0; i<h; i++){\
01528 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01529 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01530 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01531 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01532 dst+= stride;\
01533 src+= stride;\
01534 }\
01535 }else{\
01536 const int E= B+C;\
01537 const int step= C ? stride : 1;\
01538 for(i=0; i<h; i++){\
01539 OP(dst[0], (A*src[0] + E*src[step+0]));\
01540 OP(dst[1], (A*src[1] + E*src[step+1]));\
01541 OP(dst[2], (A*src[2] + E*src[step+2]));\
01542 OP(dst[3], (A*src[3] + E*src[step+3]));\
01543 dst+= stride;\
01544 src+= stride;\
01545 }\
01546 }\
01547 }\
01548 \
01549 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01550 const int A=(8-x)*(8-y);\
01551 const int B=( x)*(8-y);\
01552 const int C=(8-x)*( y);\
01553 const int D=( x)*( y);\
01554 int i;\
01555 \
01556 assert(x<8 && y<8 && x>=0 && y>=0);\
01557 \
01558 if(D){\
01559 for(i=0; i<h; i++){\
01560 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01561 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01562 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01563 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01564 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01565 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01566 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01567 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01568 dst+= stride;\
01569 src+= stride;\
01570 }\
01571 }else{\
01572 const int E= B+C;\
01573 const int step= C ? stride : 1;\
01574 for(i=0; i<h; i++){\
01575 OP(dst[0], (A*src[0] + E*src[step+0]));\
01576 OP(dst[1], (A*src[1] + E*src[step+1]));\
01577 OP(dst[2], (A*src[2] + E*src[step+2]));\
01578 OP(dst[3], (A*src[3] + E*src[step+3]));\
01579 OP(dst[4], (A*src[4] + E*src[step+4]));\
01580 OP(dst[5], (A*src[5] + E*src[step+5]));\
01581 OP(dst[6], (A*src[6] + E*src[step+6]));\
01582 OP(dst[7], (A*src[7] + E*src[step+7]));\
01583 dst+= stride;\
01584 src+= stride;\
01585 }\
01586 }\
01587 }
01588
01589 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01590 #define op_put(a, b) a = (((b) + 32)>>6)
01591
01592 H264_CHROMA_MC(put_ , op_put)
01593 H264_CHROMA_MC(avg_ , op_avg)
01594 #undef op_avg
01595 #undef op_put
01596
01597 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01598 const int A=(8-x)*(8-y);
01599 const int B=( x)*(8-y);
01600 const int C=(8-x)*( y);
01601 const int D=( x)*( y);
01602 int i;
01603
01604 assert(x<8 && y<8 && x>=0 && y>=0);
01605
01606 for(i=0; i<h; i++)
01607 {
01608 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
01609 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
01610 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
01611 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
01612 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
01613 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
01614 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
01615 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
01616 dst+= stride;
01617 src+= stride;
01618 }
01619 }
01620
01621 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
01622 const int A=(8-x)*(8-y);
01623 const int B=( x)*(8-y);
01624 const int C=(8-x)*( y);
01625 const int D=( x)*( y);
01626 int i;
01627
01628 assert(x<8 && y<8 && x>=0 && y>=0);
01629
01630 for(i=0; i<h; i++)
01631 {
01632 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
01633 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
01634 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
01635 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
01636 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
01637 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
01638 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
01639 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
01640 dst+= stride;
01641 src+= stride;
01642 }
01643 }
01644
01645 #define QPEL_MC(r, OPNAME, RND, OP) \
01646 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01647 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01648 int i;\
01649 for(i=0; i<h; i++)\
01650 {\
01651 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01652 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01653 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01654 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01655 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01656 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01657 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01658 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01659 dst+=dstStride;\
01660 src+=srcStride;\
01661 }\
01662 }\
01663 \
01664 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01665 const int w=8;\
01666 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01667 int i;\
01668 for(i=0; i<w; i++)\
01669 {\
01670 const int src0= src[0*srcStride];\
01671 const int src1= src[1*srcStride];\
01672 const int src2= src[2*srcStride];\
01673 const int src3= src[3*srcStride];\
01674 const int src4= src[4*srcStride];\
01675 const int src5= src[5*srcStride];\
01676 const int src6= src[6*srcStride];\
01677 const int src7= src[7*srcStride];\
01678 const int src8= src[8*srcStride];\
01679 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01680 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01681 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01682 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01683 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01684 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01685 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01686 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01687 dst++;\
01688 src++;\
01689 }\
01690 }\
01691 \
01692 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01693 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01694 int i;\
01695 \
01696 for(i=0; i<h; i++)\
01697 {\
01698 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01699 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01700 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01701 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01702 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01703 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01704 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01705 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01706 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01707 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01708 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01709 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01710 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01711 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01712 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01713 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01714 dst+=dstStride;\
01715 src+=srcStride;\
01716 }\
01717 }\
01718 \
01719 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01720 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01721 int i;\
01722 const int w=16;\
01723 for(i=0; i<w; i++)\
01724 {\
01725 const int src0= src[0*srcStride];\
01726 const int src1= src[1*srcStride];\
01727 const int src2= src[2*srcStride];\
01728 const int src3= src[3*srcStride];\
01729 const int src4= src[4*srcStride];\
01730 const int src5= src[5*srcStride];\
01731 const int src6= src[6*srcStride];\
01732 const int src7= src[7*srcStride];\
01733 const int src8= src[8*srcStride];\
01734 const int src9= src[9*srcStride];\
01735 const int src10= src[10*srcStride];\
01736 const int src11= src[11*srcStride];\
01737 const int src12= src[12*srcStride];\
01738 const int src13= src[13*srcStride];\
01739 const int src14= src[14*srcStride];\
01740 const int src15= src[15*srcStride];\
01741 const int src16= src[16*srcStride];\
01742 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01743 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01744 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01745 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01746 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01747 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01748 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01749 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01750 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01751 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01752 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01753 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01754 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01755 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01756 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01757 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01758 dst++;\
01759 src++;\
01760 }\
01761 }\
01762 \
01763 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01764 uint8_t half[64];\
01765 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01766 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01767 }\
01768 \
01769 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01770 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01771 }\
01772 \
01773 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01774 uint8_t half[64];\
01775 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01776 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01777 }\
01778 \
01779 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01780 uint8_t full[16*9];\
01781 uint8_t half[64];\
01782 copy_block9(full, src, 16, stride, 9);\
01783 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01784 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01785 }\
01786 \
01787 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01788 uint8_t full[16*9];\
01789 copy_block9(full, src, 16, stride, 9);\
01790 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01791 }\
01792 \
01793 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01794 uint8_t full[16*9];\
01795 uint8_t half[64];\
01796 copy_block9(full, src, 16, stride, 9);\
01797 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01798 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01799 }\
01800 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01801 uint8_t full[16*9];\
01802 uint8_t halfH[72];\
01803 uint8_t halfV[64];\
01804 uint8_t halfHV[64];\
01805 copy_block9(full, src, 16, stride, 9);\
01806 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01807 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01808 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01809 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01810 }\
01811 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01812 uint8_t full[16*9];\
01813 uint8_t halfH[72];\
01814 uint8_t halfHV[64];\
01815 copy_block9(full, src, 16, stride, 9);\
01816 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01817 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01818 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01819 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01820 }\
01821 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01822 uint8_t full[16*9];\
01823 uint8_t halfH[72];\
01824 uint8_t halfV[64];\
01825 uint8_t halfHV[64];\
01826 copy_block9(full, src, 16, stride, 9);\
01827 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01828 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01829 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01830 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01831 }\
01832 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01833 uint8_t full[16*9];\
01834 uint8_t halfH[72];\
01835 uint8_t halfHV[64];\
01836 copy_block9(full, src, 16, stride, 9);\
01837 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01838 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01839 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01840 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01841 }\
01842 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01843 uint8_t full[16*9];\
01844 uint8_t halfH[72];\
01845 uint8_t halfV[64];\
01846 uint8_t halfHV[64];\
01847 copy_block9(full, src, 16, stride, 9);\
01848 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01849 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01850 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01851 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01852 }\
01853 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01854 uint8_t full[16*9];\
01855 uint8_t halfH[72];\
01856 uint8_t halfHV[64];\
01857 copy_block9(full, src, 16, stride, 9);\
01858 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01859 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01860 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01861 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01862 }\
01863 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01864 uint8_t full[16*9];\
01865 uint8_t halfH[72];\
01866 uint8_t halfV[64];\
01867 uint8_t halfHV[64];\
01868 copy_block9(full, src, 16, stride, 9);\
01869 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01870 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01871 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01872 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01873 }\
01874 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01875 uint8_t full[16*9];\
01876 uint8_t halfH[72];\
01877 uint8_t halfHV[64];\
01878 copy_block9(full, src, 16, stride, 9);\
01879 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01880 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01881 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01882 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01883 }\
01884 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01885 uint8_t halfH[72];\
01886 uint8_t halfHV[64];\
01887 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01888 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01889 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01890 }\
01891 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01892 uint8_t halfH[72];\
01893 uint8_t halfHV[64];\
01894 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01895 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01896 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01897 }\
01898 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01899 uint8_t full[16*9];\
01900 uint8_t halfH[72];\
01901 uint8_t halfV[64];\
01902 uint8_t halfHV[64];\
01903 copy_block9(full, src, 16, stride, 9);\
01904 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01905 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01906 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01907 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01908 }\
01909 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01910 uint8_t full[16*9];\
01911 uint8_t halfH[72];\
01912 copy_block9(full, src, 16, stride, 9);\
01913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01914 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01915 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01916 }\
01917 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01918 uint8_t full[16*9];\
01919 uint8_t halfH[72];\
01920 uint8_t halfV[64];\
01921 uint8_t halfHV[64];\
01922 copy_block9(full, src, 16, stride, 9);\
01923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01924 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01926 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01927 }\
01928 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01929 uint8_t full[16*9];\
01930 uint8_t halfH[72];\
01931 copy_block9(full, src, 16, stride, 9);\
01932 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01933 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01934 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01935 }\
01936 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01937 uint8_t halfH[72];\
01938 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01939 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01940 }\
01941 \
01942 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01943 uint8_t half[256];\
01944 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01945 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01946 }\
01947 \
01948 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01949 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01950 }\
01951 \
01952 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01953 uint8_t half[256];\
01954 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01955 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01956 }\
01957 \
01958 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01959 uint8_t full[24*17];\
01960 uint8_t half[256];\
01961 copy_block17(full, src, 24, stride, 17);\
01962 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01963 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
01964 }\
01965 \
01966 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01967 uint8_t full[24*17];\
01968 copy_block17(full, src, 24, stride, 17);\
01969 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01970 }\
01971 \
01972 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01973 uint8_t full[24*17];\
01974 uint8_t half[256];\
01975 copy_block17(full, src, 24, stride, 17);\
01976 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01977 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
01978 }\
01979 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01980 uint8_t full[24*17];\
01981 uint8_t halfH[272];\
01982 uint8_t halfV[256];\
01983 uint8_t halfHV[256];\
01984 copy_block17(full, src, 24, stride, 17);\
01985 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01986 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01987 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01988 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01989 }\
01990 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01991 uint8_t full[24*17];\
01992 uint8_t halfH[272];\
01993 uint8_t halfHV[256];\
01994 copy_block17(full, src, 24, stride, 17);\
01995 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01996 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01997 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01998 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01999 }\
02000 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
02001 uint8_t full[24*17];\
02002 uint8_t halfH[272];\
02003 uint8_t halfV[256];\
02004 uint8_t halfHV[256];\
02005 copy_block17(full, src, 24, stride, 17);\
02006 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02007 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02008 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02009 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02010 }\
02011 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02012 uint8_t full[24*17];\
02013 uint8_t halfH[272];\
02014 uint8_t halfHV[256];\
02015 copy_block17(full, src, 24, stride, 17);\
02016 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02017 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02018 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02019 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02020 }\
02021 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
02022 uint8_t full[24*17];\
02023 uint8_t halfH[272];\
02024 uint8_t halfV[256];\
02025 uint8_t halfHV[256];\
02026 copy_block17(full, src, 24, stride, 17);\
02027 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02028 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02029 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02030 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02031 }\
02032 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02033 uint8_t full[24*17];\
02034 uint8_t halfH[272];\
02035 uint8_t halfHV[256];\
02036 copy_block17(full, src, 24, stride, 17);\
02037 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02038 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02039 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02040 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02041 }\
02042 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
02043 uint8_t full[24*17];\
02044 uint8_t halfH[272];\
02045 uint8_t halfV[256];\
02046 uint8_t halfHV[256];\
02047 copy_block17(full, src, 24, stride, 17);\
02048 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
02049 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02050 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02051 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02052 }\
02053 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02054 uint8_t full[24*17];\
02055 uint8_t halfH[272];\
02056 uint8_t halfHV[256];\
02057 copy_block17(full, src, 24, stride, 17);\
02058 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02059 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02060 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02061 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02062 }\
02063 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02064 uint8_t halfH[272];\
02065 uint8_t halfHV[256];\
02066 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02067 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02068 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02069 }\
02070 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02071 uint8_t halfH[272];\
02072 uint8_t halfHV[256];\
02073 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02074 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02075 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02076 }\
02077 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
02078 uint8_t full[24*17];\
02079 uint8_t halfH[272];\
02080 uint8_t halfV[256];\
02081 uint8_t halfHV[256];\
02082 copy_block17(full, src, 24, stride, 17);\
02083 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02084 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02085 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02086 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02087 }\
02088 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02089 uint8_t full[24*17];\
02090 uint8_t halfH[272];\
02091 copy_block17(full, src, 24, stride, 17);\
02092 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02093 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02094 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02095 }\
02096 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02097 uint8_t full[24*17];\
02098 uint8_t halfH[272];\
02099 uint8_t halfV[256];\
02100 uint8_t halfHV[256];\
02101 copy_block17(full, src, 24, stride, 17);\
02102 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02103 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02104 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02105 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02106 }\
02107 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02108 uint8_t full[24*17];\
02109 uint8_t halfH[272];\
02110 copy_block17(full, src, 24, stride, 17);\
02111 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02112 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02113 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02114 }\
02115 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02116 uint8_t halfH[272];\
02117 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02118 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02119 }
02120
02121 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02122 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02123 #define op_put(a, b) a = cm[((b) + 16)>>5]
02124 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02125
02126 QPEL_MC(0, put_ , _ , op_put)
02127 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02128 QPEL_MC(0, avg_ , _ , op_avg)
02129
02130 #undef op_avg
02131 #undef op_avg_no_rnd
02132 #undef op_put
02133 #undef op_put_no_rnd
02134
02135 #define put_qpel8_mc00_c ff_put_pixels8x8_c
02136 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
02137 #define put_qpel16_mc00_c ff_put_pixels16x16_c
02138 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
02139 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
02140 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
02141
02142 #if 1
02143 #define H264_LOWPASS(OPNAME, OP, OP2) \
02144 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02145 const int h=2;\
02146 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02147 int i;\
02148 for(i=0; i<h; i++)\
02149 {\
02150 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02151 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02152 dst+=dstStride;\
02153 src+=srcStride;\
02154 }\
02155 }\
02156 \
02157 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02158 const int w=2;\
02159 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02160 int i;\
02161 for(i=0; i<w; i++)\
02162 {\
02163 const int srcB= src[-2*srcStride];\
02164 const int srcA= src[-1*srcStride];\
02165 const int src0= src[0 *srcStride];\
02166 const int src1= src[1 *srcStride];\
02167 const int src2= src[2 *srcStride];\
02168 const int src3= src[3 *srcStride];\
02169 const int src4= src[4 *srcStride];\
02170 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02171 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02172 dst++;\
02173 src++;\
02174 }\
02175 }\
02176 \
02177 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02178 const int h=2;\
02179 const int w=2;\
02180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02181 int i;\
02182 src -= 2*srcStride;\
02183 for(i=0; i<h+5; i++)\
02184 {\
02185 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02186 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02187 tmp+=tmpStride;\
02188 src+=srcStride;\
02189 }\
02190 tmp -= tmpStride*(h+5-2);\
02191 for(i=0; i<w; i++)\
02192 {\
02193 const int tmpB= tmp[-2*tmpStride];\
02194 const int tmpA= tmp[-1*tmpStride];\
02195 const int tmp0= tmp[0 *tmpStride];\
02196 const int tmp1= tmp[1 *tmpStride];\
02197 const int tmp2= tmp[2 *tmpStride];\
02198 const int tmp3= tmp[3 *tmpStride];\
02199 const int tmp4= tmp[4 *tmpStride];\
02200 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02201 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02202 dst++;\
02203 tmp++;\
02204 }\
02205 }\
02206 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02207 const int h=4;\
02208 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02209 int i;\
02210 for(i=0; i<h; i++)\
02211 {\
02212 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02213 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02214 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02215 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02216 dst+=dstStride;\
02217 src+=srcStride;\
02218 }\
02219 }\
02220 \
02221 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02222 const int w=4;\
02223 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02224 int i;\
02225 for(i=0; i<w; i++)\
02226 {\
02227 const int srcB= src[-2*srcStride];\
02228 const int srcA= src[-1*srcStride];\
02229 const int src0= src[0 *srcStride];\
02230 const int src1= src[1 *srcStride];\
02231 const int src2= src[2 *srcStride];\
02232 const int src3= src[3 *srcStride];\
02233 const int src4= src[4 *srcStride];\
02234 const int src5= src[5 *srcStride];\
02235 const int src6= src[6 *srcStride];\
02236 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02237 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02238 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02239 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02240 dst++;\
02241 src++;\
02242 }\
02243 }\
02244 \
02245 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02246 const int h=4;\
02247 const int w=4;\
02248 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02249 int i;\
02250 src -= 2*srcStride;\
02251 for(i=0; i<h+5; i++)\
02252 {\
02253 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02254 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02255 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02256 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02257 tmp+=tmpStride;\
02258 src+=srcStride;\
02259 }\
02260 tmp -= tmpStride*(h+5-2);\
02261 for(i=0; i<w; i++)\
02262 {\
02263 const int tmpB= tmp[-2*tmpStride];\
02264 const int tmpA= tmp[-1*tmpStride];\
02265 const int tmp0= tmp[0 *tmpStride];\
02266 const int tmp1= tmp[1 *tmpStride];\
02267 const int tmp2= tmp[2 *tmpStride];\
02268 const int tmp3= tmp[3 *tmpStride];\
02269 const int tmp4= tmp[4 *tmpStride];\
02270 const int tmp5= tmp[5 *tmpStride];\
02271 const int tmp6= tmp[6 *tmpStride];\
02272 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02273 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02274 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02275 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02276 dst++;\
02277 tmp++;\
02278 }\
02279 }\
02280 \
02281 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02282 const int h=8;\
02283 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02284 int i;\
02285 for(i=0; i<h; i++)\
02286 {\
02287 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02288 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02289 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02290 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02291 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02292 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02293 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02294 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02295 dst+=dstStride;\
02296 src+=srcStride;\
02297 }\
02298 }\
02299 \
02300 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02301 const int w=8;\
02302 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02303 int i;\
02304 for(i=0; i<w; i++)\
02305 {\
02306 const int srcB= src[-2*srcStride];\
02307 const int srcA= src[-1*srcStride];\
02308 const int src0= src[0 *srcStride];\
02309 const int src1= src[1 *srcStride];\
02310 const int src2= src[2 *srcStride];\
02311 const int src3= src[3 *srcStride];\
02312 const int src4= src[4 *srcStride];\
02313 const int src5= src[5 *srcStride];\
02314 const int src6= src[6 *srcStride];\
02315 const int src7= src[7 *srcStride];\
02316 const int src8= src[8 *srcStride];\
02317 const int src9= src[9 *srcStride];\
02318 const int src10=src[10*srcStride];\
02319 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02320 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02321 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02322 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02323 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02324 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02325 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02326 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02327 dst++;\
02328 src++;\
02329 }\
02330 }\
02331 \
02332 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02333 const int h=8;\
02334 const int w=8;\
02335 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02336 int i;\
02337 src -= 2*srcStride;\
02338 for(i=0; i<h+5; i++)\
02339 {\
02340 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02341 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02342 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02343 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02344 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02345 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02346 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02347 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02348 tmp+=tmpStride;\
02349 src+=srcStride;\
02350 }\
02351 tmp -= tmpStride*(h+5-2);\
02352 for(i=0; i<w; i++)\
02353 {\
02354 const int tmpB= tmp[-2*tmpStride];\
02355 const int tmpA= tmp[-1*tmpStride];\
02356 const int tmp0= tmp[0 *tmpStride];\
02357 const int tmp1= tmp[1 *tmpStride];\
02358 const int tmp2= tmp[2 *tmpStride];\
02359 const int tmp3= tmp[3 *tmpStride];\
02360 const int tmp4= tmp[4 *tmpStride];\
02361 const int tmp5= tmp[5 *tmpStride];\
02362 const int tmp6= tmp[6 *tmpStride];\
02363 const int tmp7= tmp[7 *tmpStride];\
02364 const int tmp8= tmp[8 *tmpStride];\
02365 const int tmp9= tmp[9 *tmpStride];\
02366 const int tmp10=tmp[10*tmpStride];\
02367 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02368 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02369 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02370 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02371 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02372 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02373 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02374 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02375 dst++;\
02376 tmp++;\
02377 }\
02378 }\
02379 \
02380 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02381 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02382 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02383 src += 8*srcStride;\
02384 dst += 8*dstStride;\
02385 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02386 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02387 }\
02388 \
02389 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02390 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02391 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02392 src += 8*srcStride;\
02393 dst += 8*dstStride;\
02394 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02395 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02396 }\
02397 \
02398 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02399 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02400 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02401 src += 8*srcStride;\
02402 dst += 8*dstStride;\
02403 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02404 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02405 }\
02406
02407 #define H264_MC(OPNAME, SIZE) \
02408 static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02409 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02410 }\
02411 \
02412 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02413 uint8_t half[SIZE*SIZE];\
02414 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02415 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02416 }\
02417 \
02418 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02419 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02420 }\
02421 \
02422 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02423 uint8_t half[SIZE*SIZE];\
02424 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02425 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02426 }\
02427 \
02428 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02429 uint8_t full[SIZE*(SIZE+5)];\
02430 uint8_t * const full_mid= full + SIZE*2;\
02431 uint8_t half[SIZE*SIZE];\
02432 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02433 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02434 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02435 }\
02436 \
02437 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02438 uint8_t full[SIZE*(SIZE+5)];\
02439 uint8_t * const full_mid= full + SIZE*2;\
02440 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02441 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02442 }\
02443 \
02444 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02445 uint8_t full[SIZE*(SIZE+5)];\
02446 uint8_t * const full_mid= full + SIZE*2;\
02447 uint8_t half[SIZE*SIZE];\
02448 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02449 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02450 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02451 }\
02452 \
02453 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02454 uint8_t full[SIZE*(SIZE+5)];\
02455 uint8_t * const full_mid= full + SIZE*2;\
02456 uint8_t halfH[SIZE*SIZE];\
02457 uint8_t halfV[SIZE*SIZE];\
02458 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02459 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02460 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02461 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02462 }\
02463 \
02464 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02465 uint8_t full[SIZE*(SIZE+5)];\
02466 uint8_t * const full_mid= full + SIZE*2;\
02467 uint8_t halfH[SIZE*SIZE];\
02468 uint8_t halfV[SIZE*SIZE];\
02469 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02470 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02471 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02472 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02473 }\
02474 \
02475 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02476 uint8_t full[SIZE*(SIZE+5)];\
02477 uint8_t * const full_mid= full + SIZE*2;\
02478 uint8_t halfH[SIZE*SIZE];\
02479 uint8_t halfV[SIZE*SIZE];\
02480 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02481 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02482 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02483 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02484 }\
02485 \
02486 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02487 uint8_t full[SIZE*(SIZE+5)];\
02488 uint8_t * const full_mid= full + SIZE*2;\
02489 uint8_t halfH[SIZE*SIZE];\
02490 uint8_t halfV[SIZE*SIZE];\
02491 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02492 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02493 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02494 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02495 }\
02496 \
02497 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02498 int16_t tmp[SIZE*(SIZE+5)];\
02499 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02500 }\
02501 \
02502 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02503 int16_t tmp[SIZE*(SIZE+5)];\
02504 uint8_t halfH[SIZE*SIZE];\
02505 uint8_t halfHV[SIZE*SIZE];\
02506 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02507 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02508 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02509 }\
02510 \
02511 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02512 int16_t tmp[SIZE*(SIZE+5)];\
02513 uint8_t halfH[SIZE*SIZE];\
02514 uint8_t halfHV[SIZE*SIZE];\
02515 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02516 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02517 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02518 }\
02519 \
02520 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02521 uint8_t full[SIZE*(SIZE+5)];\
02522 uint8_t * const full_mid= full + SIZE*2;\
02523 int16_t tmp[SIZE*(SIZE+5)];\
02524 uint8_t halfV[SIZE*SIZE];\
02525 uint8_t halfHV[SIZE*SIZE];\
02526 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02527 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02528 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02529 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02530 }\
02531 \
02532 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02533 uint8_t full[SIZE*(SIZE+5)];\
02534 uint8_t * const full_mid= full + SIZE*2;\
02535 int16_t tmp[SIZE*(SIZE+5)];\
02536 uint8_t halfV[SIZE*SIZE];\
02537 uint8_t halfHV[SIZE*SIZE];\
02538 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02539 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02540 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02541 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02542 }\
02543
02544 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02545
02546 #define op_put(a, b) a = cm[((b) + 16)>>5]
02547 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02548 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02549
02550 H264_LOWPASS(put_ , op_put, op2_put)
02551 H264_LOWPASS(avg_ , op_avg, op2_avg)
02552 H264_MC(put_, 2)
02553 H264_MC(put_, 4)
02554 H264_MC(put_, 8)
02555 H264_MC(put_, 16)
02556 H264_MC(avg_, 4)
02557 H264_MC(avg_, 8)
02558 H264_MC(avg_, 16)
02559
02560 #undef op_avg
02561 #undef op_put
02562 #undef op2_avg
02563 #undef op2_put
02564 #endif
02565
02566 #define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
02567 #define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
02568 #define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
02569 #define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
02570
02571 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02572 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02573 int i;
02574
02575 for(i=0; i<h; i++){
02576 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02577 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02578 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02579 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02580 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02581 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02582 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02583 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02584 dst+=dstStride;
02585 src+=srcStride;
02586 }
02587 }
02588
02589 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
02590 put_pixels8_c(dst, src, stride, 8);
02591 }
02592 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
02593 avg_pixels8_c(dst, src, stride, 8);
02594 }
02595 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
02596 put_pixels16_c(dst, src, stride, 16);
02597 }
02598 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
02599 avg_pixels16_c(dst, src, stride, 16);
02600 }
02601
02602 #if CONFIG_RV40_DECODER
02603 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02604 put_pixels16_xy2_c(dst, src, stride, 16);
02605 }
02606 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02607 avg_pixels16_xy2_c(dst, src, stride, 16);
02608 }
02609 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02610 put_pixels8_xy2_c(dst, src, stride, 8);
02611 }
02612 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02613 avg_pixels8_xy2_c(dst, src, stride, 8);
02614 }
02615 #endif
02616
02617 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02618 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02619 int i;
02620
02621 for(i=0; i<w; i++){
02622 const int src_1= src[ -srcStride];
02623 const int src0 = src[0 ];
02624 const int src1 = src[ srcStride];
02625 const int src2 = src[2*srcStride];
02626 const int src3 = src[3*srcStride];
02627 const int src4 = src[4*srcStride];
02628 const int src5 = src[5*srcStride];
02629 const int src6 = src[6*srcStride];
02630 const int src7 = src[7*srcStride];
02631 const int src8 = src[8*srcStride];
02632 const int src9 = src[9*srcStride];
02633 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02634 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02635 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02636 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02637 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02638 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02639 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02640 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02641 src++;
02642 dst++;
02643 }
02644 }
02645
02646 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02647 uint8_t half[64];
02648 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02649 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02650 }
02651
02652 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02653 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02654 }
02655
02656 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02657 uint8_t half[64];
02658 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02659 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02660 }
02661
02662 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02663 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02664 }
02665
02666 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02667 uint8_t halfH[88];
02668 uint8_t halfV[64];
02669 uint8_t halfHV[64];
02670 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02671 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02672 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02673 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02674 }
02675 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02676 uint8_t halfH[88];
02677 uint8_t halfV[64];
02678 uint8_t halfHV[64];
02679 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02680 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02681 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02682 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02683 }
02684 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02685 uint8_t halfH[88];
02686 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02687 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02688 }
02689
02690 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02691 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02692 int x;
02693 const int strength= ff_h263_loop_filter_strength[qscale];
02694
02695 for(x=0; x<8; x++){
02696 int d1, d2, ad1;
02697 int p0= src[x-2*stride];
02698 int p1= src[x-1*stride];
02699 int p2= src[x+0*stride];
02700 int p3= src[x+1*stride];
02701 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02702
02703 if (d<-2*strength) d1= 0;
02704 else if(d<- strength) d1=-2*strength - d;
02705 else if(d< strength) d1= d;
02706 else if(d< 2*strength) d1= 2*strength - d;
02707 else d1= 0;
02708
02709 p1 += d1;
02710 p2 -= d1;
02711 if(p1&256) p1= ~(p1>>31);
02712 if(p2&256) p2= ~(p2>>31);
02713
02714 src[x-1*stride] = p1;
02715 src[x+0*stride] = p2;
02716
02717 ad1= FFABS(d1)>>1;
02718
02719 d2= av_clip((p0-p3)/4, -ad1, ad1);
02720
02721 src[x-2*stride] = p0 - d2;
02722 src[x+ stride] = p3 + d2;
02723 }
02724 }
02725 }
02726
02727 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02728 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02729 int y;
02730 const int strength= ff_h263_loop_filter_strength[qscale];
02731
02732 for(y=0; y<8; y++){
02733 int d1, d2, ad1;
02734 int p0= src[y*stride-2];
02735 int p1= src[y*stride-1];
02736 int p2= src[y*stride+0];
02737 int p3= src[y*stride+1];
02738 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02739
02740 if (d<-2*strength) d1= 0;
02741 else if(d<- strength) d1=-2*strength - d;
02742 else if(d< strength) d1= d;
02743 else if(d< 2*strength) d1= 2*strength - d;
02744 else d1= 0;
02745
02746 p1 += d1;
02747 p2 -= d1;
02748 if(p1&256) p1= ~(p1>>31);
02749 if(p2&256) p2= ~(p2>>31);
02750
02751 src[y*stride-1] = p1;
02752 src[y*stride+0] = p2;
02753
02754 ad1= FFABS(d1)>>1;
02755
02756 d2= av_clip((p0-p3)/4, -ad1, ad1);
02757
02758 src[y*stride-2] = p0 - d2;
02759 src[y*stride+1] = p3 + d2;
02760 }
02761 }
02762 }
02763
02764 static void h261_loop_filter_c(uint8_t *src, int stride){
02765 int x,y,xy,yz;
02766 int temp[64];
02767
02768 for(x=0; x<8; x++){
02769 temp[x ] = 4*src[x ];
02770 temp[x + 7*8] = 4*src[x + 7*stride];
02771 }
02772 for(y=1; y<7; y++){
02773 for(x=0; x<8; x++){
02774 xy = y * stride + x;
02775 yz = y * 8 + x;
02776 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02777 }
02778 }
02779
02780 for(y=0; y<8; y++){
02781 src[ y*stride] = (temp[ y*8] + 2)>>2;
02782 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02783 for(x=1; x<7; x++){
02784 xy = y * stride + x;
02785 yz = y * 8 + x;
02786 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02787 }
02788 }
02789 }
02790
02791 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02792 {
02793 int s, i;
02794
02795 s = 0;
02796 for(i=0;i<h;i++) {
02797 s += abs(pix1[0] - pix2[0]);
02798 s += abs(pix1[1] - pix2[1]);
02799 s += abs(pix1[2] - pix2[2]);
02800 s += abs(pix1[3] - pix2[3]);
02801 s += abs(pix1[4] - pix2[4]);
02802 s += abs(pix1[5] - pix2[5]);
02803 s += abs(pix1[6] - pix2[6]);
02804 s += abs(pix1[7] - pix2[7]);
02805 s += abs(pix1[8] - pix2[8]);
02806 s += abs(pix1[9] - pix2[9]);
02807 s += abs(pix1[10] - pix2[10]);
02808 s += abs(pix1[11] - pix2[11]);
02809 s += abs(pix1[12] - pix2[12]);
02810 s += abs(pix1[13] - pix2[13]);
02811 s += abs(pix1[14] - pix2[14]);
02812 s += abs(pix1[15] - pix2[15]);
02813 pix1 += line_size;
02814 pix2 += line_size;
02815 }
02816 return s;
02817 }
02818
02819 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02820 {
02821 int s, i;
02822
02823 s = 0;
02824 for(i=0;i<h;i++) {
02825 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02826 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02827 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02828 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02829 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02830 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02831 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02832 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02833 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02834 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02835 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02836 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02837 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02838 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02839 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02840 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02841 pix1 += line_size;
02842 pix2 += line_size;
02843 }
02844 return s;
02845 }
02846
02847 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02848 {
02849 int s, i;
02850 uint8_t *pix3 = pix2 + line_size;
02851
02852 s = 0;
02853 for(i=0;i<h;i++) {
02854 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02855 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02856 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02857 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02858 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02859 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02860 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02861 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02862 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02863 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02864 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02865 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02866 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02867 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02868 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02869 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02870 pix1 += line_size;
02871 pix2 += line_size;
02872 pix3 += line_size;
02873 }
02874 return s;
02875 }
02876
02877 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02878 {
02879 int s, i;
02880 uint8_t *pix3 = pix2 + line_size;
02881
02882 s = 0;
02883 for(i=0;i<h;i++) {
02884 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02885 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02886 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02887 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02888 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02889 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02890 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02891 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02892 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
02893 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
02894 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
02895 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
02896 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
02897 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
02898 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
02899 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
02900 pix1 += line_size;
02901 pix2 += line_size;
02902 pix3 += line_size;
02903 }
02904 return s;
02905 }
02906
02907 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02908 {
02909 int s, i;
02910
02911 s = 0;
02912 for(i=0;i<h;i++) {
02913 s += abs(pix1[0] - pix2[0]);
02914 s += abs(pix1[1] - pix2[1]);
02915 s += abs(pix1[2] - pix2[2]);
02916 s += abs(pix1[3] - pix2[3]);
02917 s += abs(pix1[4] - pix2[4]);
02918 s += abs(pix1[5] - pix2[5]);
02919 s += abs(pix1[6] - pix2[6]);
02920 s += abs(pix1[7] - pix2[7]);
02921 pix1 += line_size;
02922 pix2 += line_size;
02923 }
02924 return s;
02925 }
02926
02927 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02928 {
02929 int s, i;
02930
02931 s = 0;
02932 for(i=0;i<h;i++) {
02933 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02934 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02935 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02936 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02937 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02938 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02939 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02940 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02941 pix1 += line_size;
02942 pix2 += line_size;
02943 }
02944 return s;
02945 }
02946
02947 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02948 {
02949 int s, i;
02950 uint8_t *pix3 = pix2 + line_size;
02951
02952 s = 0;
02953 for(i=0;i<h;i++) {
02954 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02955 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02956 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02957 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02958 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02959 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02960 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02961 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02962 pix1 += line_size;
02963 pix2 += line_size;
02964 pix3 += line_size;
02965 }
02966 return s;
02967 }
02968
02969 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02970 {
02971 int s, i;
02972 uint8_t *pix3 = pix2 + line_size;
02973
02974 s = 0;
02975 for(i=0;i<h;i++) {
02976 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02977 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02978 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02979 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02980 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02981 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02982 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02983 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02984 pix1 += line_size;
02985 pix2 += line_size;
02986 pix3 += line_size;
02987 }
02988 return s;
02989 }
02990
02991 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
02992 MpegEncContext *c = v;
02993 int score1=0;
02994 int score2=0;
02995 int x,y;
02996
02997 for(y=0; y<h; y++){
02998 for(x=0; x<16; x++){
02999 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03000 }
03001 if(y+1<h){
03002 for(x=0; x<15; x++){
03003 score2+= FFABS( s1[x ] - s1[x +stride]
03004 - s1[x+1] + s1[x+1+stride])
03005 -FFABS( s2[x ] - s2[x +stride]
03006 - s2[x+1] + s2[x+1+stride]);
03007 }
03008 }
03009 s1+= stride;
03010 s2+= stride;
03011 }
03012
03013 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03014 else return score1 + FFABS(score2)*8;
03015 }
03016
03017 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
03018 MpegEncContext *c = v;
03019 int score1=0;
03020 int score2=0;
03021 int x,y;
03022
03023 for(y=0; y<h; y++){
03024 for(x=0; x<8; x++){
03025 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03026 }
03027 if(y+1<h){
03028 for(x=0; x<7; x++){
03029 score2+= FFABS( s1[x ] - s1[x +stride]
03030 - s1[x+1] + s1[x+1+stride])
03031 -FFABS( s2[x ] - s2[x +stride]
03032 - s2[x+1] + s2[x+1+stride]);
03033 }
03034 }
03035 s1+= stride;
03036 s2+= stride;
03037 }
03038
03039 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
03040 else return score1 + FFABS(score2)*8;
03041 }
03042
03043 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03044 int i;
03045 unsigned int sum=0;
03046
03047 for(i=0; i<8*8; i++){
03048 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03049 int w= weight[i];
03050 b>>= RECON_SHIFT;
03051 assert(-512<b && b<512);
03052
03053 sum += (w*b)*(w*b)>>4;
03054 }
03055 return sum>>2;
03056 }
03057
03058 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03059 int i;
03060
03061 for(i=0; i<8*8; i++){
03062 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03063 }
03064 }
03065
03074 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03075 {
03076 int i;
03077 DCTELEM temp[64];
03078
03079 if(last<=0) return;
03080
03081
03082 for(i=0; i<=last; i++){
03083 const int j= scantable[i];
03084 temp[j]= block[j];
03085 block[j]=0;
03086 }
03087
03088 for(i=0; i<=last; i++){
03089 const int j= scantable[i];
03090 const int perm_j= permutation[j];
03091 block[perm_j]= temp[j];
03092 }
03093 }
03094
03095 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03096 return 0;
03097 }
03098
03099 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03100 int i;
03101
03102 memset(cmp, 0, sizeof(void*)*6);
03103
03104 for(i=0; i<6; i++){
03105 switch(type&0xFF){
03106 case FF_CMP_SAD:
03107 cmp[i]= c->sad[i];
03108 break;
03109 case FF_CMP_SATD:
03110 cmp[i]= c->hadamard8_diff[i];
03111 break;
03112 case FF_CMP_SSE:
03113 cmp[i]= c->sse[i];
03114 break;
03115 case FF_CMP_DCT:
03116 cmp[i]= c->dct_sad[i];
03117 break;
03118 case FF_CMP_DCT264:
03119 cmp[i]= c->dct264_sad[i];
03120 break;
03121 case FF_CMP_DCTMAX:
03122 cmp[i]= c->dct_max[i];
03123 break;
03124 case FF_CMP_PSNR:
03125 cmp[i]= c->quant_psnr[i];
03126 break;
03127 case FF_CMP_BIT:
03128 cmp[i]= c->bit[i];
03129 break;
03130 case FF_CMP_RD:
03131 cmp[i]= c->rd[i];
03132 break;
03133 case FF_CMP_VSAD:
03134 cmp[i]= c->vsad[i];
03135 break;
03136 case FF_CMP_VSSE:
03137 cmp[i]= c->vsse[i];
03138 break;
03139 case FF_CMP_ZERO:
03140 cmp[i]= zero_cmp;
03141 break;
03142 case FF_CMP_NSSE:
03143 cmp[i]= c->nsse[i];
03144 break;
03145 #if CONFIG_DWT
03146 case FF_CMP_W53:
03147 cmp[i]= c->w53[i];
03148 break;
03149 case FF_CMP_W97:
03150 cmp[i]= c->w97[i];
03151 break;
03152 #endif
03153 default:
03154 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03155 }
03156 }
03157 }
03158
03159 static void clear_block_c(DCTELEM *block)
03160 {
03161 memset(block, 0, sizeof(DCTELEM)*64);
03162 }
03163
03167 static void clear_blocks_c(DCTELEM *blocks)
03168 {
03169 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03170 }
03171
03172 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03173 long i;
03174 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03175 long a = *(long*)(src+i);
03176 long b = *(long*)(dst+i);
03177 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03178 }
03179 for(; i<w; i++)
03180 dst[i+0] += src[i+0];
03181 }
03182
03183 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03184 long i;
03185 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03186 long a = *(long*)(src1+i);
03187 long b = *(long*)(src2+i);
03188 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03189 }
03190 for(; i<w; i++)
03191 dst[i] = src1[i]+src2[i];
03192 }
03193
03194 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03195 long i;
03196 #if !HAVE_FAST_UNALIGNED
03197 if((long)src2 & (sizeof(long)-1)){
03198 for(i=0; i+7<w; i+=8){
03199 dst[i+0] = src1[i+0]-src2[i+0];
03200 dst[i+1] = src1[i+1]-src2[i+1];
03201 dst[i+2] = src1[i+2]-src2[i+2];
03202 dst[i+3] = src1[i+3]-src2[i+3];
03203 dst[i+4] = src1[i+4]-src2[i+4];
03204 dst[i+5] = src1[i+5]-src2[i+5];
03205 dst[i+6] = src1[i+6]-src2[i+6];
03206 dst[i+7] = src1[i+7]-src2[i+7];
03207 }
03208 }else
03209 #endif
03210 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03211 long a = *(long*)(src1+i);
03212 long b = *(long*)(src2+i);
03213 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
03214 }
03215 for(; i<w; i++)
03216 dst[i+0] = src1[i+0]-src2[i+0];
03217 }
03218
03219 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
03220 int i;
03221 uint8_t l, lt;
03222
03223 l= *left;
03224 lt= *left_top;
03225
03226 for(i=0; i<w; i++){
03227 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
03228 lt= src1[i];
03229 dst[i]= l;
03230 }
03231
03232 *left= l;
03233 *left_top= lt;
03234 }
03235
03236 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
03237 int i;
03238 uint8_t l, lt;
03239
03240 l= *left;
03241 lt= *left_top;
03242
03243 for(i=0; i<w; i++){
03244 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03245 lt= src1[i];
03246 l= src2[i];
03247 dst[i]= l - pred;
03248 }
03249
03250 *left= l;
03251 *left_top= lt;
03252 }
03253
03254 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
03255 int i;
03256
03257 for(i=0; i<w-1; i++){
03258 acc+= src[i];
03259 dst[i]= acc;
03260 i++;
03261 acc+= src[i];
03262 dst[i]= acc;
03263 }
03264
03265 for(; i<w; i++){
03266 acc+= src[i];
03267 dst[i]= acc;
03268 }
03269
03270 return acc;
03271 }
03272
03273 #if HAVE_BIGENDIAN
03274 #define B 3
03275 #define G 2
03276 #define R 1
03277 #define A 0
03278 #else
03279 #define B 0
03280 #define G 1
03281 #define R 2
03282 #define A 3
03283 #endif
03284 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
03285 int i;
03286 int r,g,b,a;
03287 r= *red;
03288 g= *green;
03289 b= *blue;
03290 a= *alpha;
03291
03292 for(i=0; i<w; i++){
03293 b+= src[4*i+B];
03294 g+= src[4*i+G];
03295 r+= src[4*i+R];
03296 a+= src[4*i+A];
03297
03298 dst[4*i+B]= b;
03299 dst[4*i+G]= g;
03300 dst[4*i+R]= r;
03301 dst[4*i+A]= a;
03302 }
03303
03304 *red= r;
03305 *green= g;
03306 *blue= b;
03307 *alpha= a;
03308 }
03309 #undef B
03310 #undef G
03311 #undef R
03312 #undef A
03313
03314 #define BUTTERFLY2(o1,o2,i1,i2) \
03315 o1= (i1)+(i2);\
03316 o2= (i1)-(i2);
03317
03318 #define BUTTERFLY1(x,y) \
03319 {\
03320 int a,b;\
03321 a= x;\
03322 b= y;\
03323 x= a+b;\
03324 y= a-b;\
03325 }
03326
03327 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03328
03329 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03330 int i;
03331 int temp[64];
03332 int sum=0;
03333
03334 assert(h==8);
03335
03336 for(i=0; i<8; i++){
03337
03338 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03339 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03340 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03341 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03342
03343 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03344 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03345 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03346 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03347
03348 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03349 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03350 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03351 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03352 }
03353
03354 for(i=0; i<8; i++){
03355 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03356 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03357 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03358 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03359
03360 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03361 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03362 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03363 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03364
03365 sum +=
03366 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03367 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03368 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03369 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03370 }
03371 #if 0
03372 static int maxi=0;
03373 if(sum>maxi){
03374 maxi=sum;
03375 printf("MAX:%d\n", maxi);
03376 }
03377 #endif
03378 return sum;
03379 }
03380
03381 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03382 int i;
03383 int temp[64];
03384 int sum=0;
03385
03386 assert(h==8);
03387
03388 for(i=0; i<8; i++){
03389
03390 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03391 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03392 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03393 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03394
03395 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03396 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03397 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03398 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03399
03400 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03401 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03402 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03403 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03404 }
03405
03406 for(i=0; i<8; i++){
03407 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03408 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03409 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03410 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03411
03412 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03413 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03414 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03415 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03416
03417 sum +=
03418 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03419 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03420 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03421 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03422 }
03423
03424 sum -= FFABS(temp[8*0] + temp[8*4]);
03425
03426 return sum;
03427 }
03428
03429 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03430 MpegEncContext * const s= (MpegEncContext *)c;
03431 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03432
03433 assert(h==8);
03434
03435 s->dsp.diff_pixels(temp, src1, src2, stride);
03436 s->dsp.fdct(temp);
03437 return s->dsp.sum_abs_dctelem(temp);
03438 }
03439
03440 #if CONFIG_GPL
03441 #define DCT8_1D {\
03442 const int s07 = SRC(0) + SRC(7);\
03443 const int s16 = SRC(1) + SRC(6);\
03444 const int s25 = SRC(2) + SRC(5);\
03445 const int s34 = SRC(3) + SRC(4);\
03446 const int a0 = s07 + s34;\
03447 const int a1 = s16 + s25;\
03448 const int a2 = s07 - s34;\
03449 const int a3 = s16 - s25;\
03450 const int d07 = SRC(0) - SRC(7);\
03451 const int d16 = SRC(1) - SRC(6);\
03452 const int d25 = SRC(2) - SRC(5);\
03453 const int d34 = SRC(3) - SRC(4);\
03454 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03455 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03456 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03457 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03458 DST(0, a0 + a1 ) ;\
03459 DST(1, a4 + (a7>>2)) ;\
03460 DST(2, a2 + (a3>>1)) ;\
03461 DST(3, a5 + (a6>>2)) ;\
03462 DST(4, a0 - a1 ) ;\
03463 DST(5, a6 - (a5>>2)) ;\
03464 DST(6, (a2>>1) - a3 ) ;\
03465 DST(7, (a4>>2) - a7 ) ;\
03466 }
03467
03468 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03469 MpegEncContext * const s= (MpegEncContext *)c;
03470 DCTELEM dct[8][8];
03471 int i;
03472 int sum=0;
03473
03474 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03475
03476 #define SRC(x) dct[i][x]
03477 #define DST(x,v) dct[i][x]= v
03478 for( i = 0; i < 8; i++ )
03479 DCT8_1D
03480 #undef SRC
03481 #undef DST
03482
03483 #define SRC(x) dct[x][i]
03484 #define DST(x,v) sum += FFABS(v)
03485 for( i = 0; i < 8; i++ )
03486 DCT8_1D
03487 #undef SRC
03488 #undef DST
03489 return sum;
03490 }
03491 #endif
03492
03493 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03494 MpegEncContext * const s= (MpegEncContext *)c;
03495 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03496 int sum=0, i;
03497
03498 assert(h==8);
03499
03500 s->dsp.diff_pixels(temp, src1, src2, stride);
03501 s->dsp.fdct(temp);
03502
03503 for(i=0; i<64; i++)
03504 sum= FFMAX(sum, FFABS(temp[i]));
03505
03506 return sum;
03507 }
03508
03509 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03510 MpegEncContext * const s= (MpegEncContext *)c;
03511 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
03512 DCTELEM * const bak = temp+64;
03513 int sum=0, i;
03514
03515 assert(h==8);
03516 s->mb_intra=0;
03517
03518 s->dsp.diff_pixels(temp, src1, src2, stride);
03519
03520 memcpy(bak, temp, 64*sizeof(DCTELEM));
03521
03522 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03523 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03524 ff_simple_idct(temp);
03525
03526 for(i=0; i<64; i++)
03527 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03528
03529 return sum;
03530 }
03531
03532 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03533 MpegEncContext * const s= (MpegEncContext *)c;
03534 const uint8_t *scantable= s->intra_scantable.permutated;
03535 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03536 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
03537 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
03538 int i, last, run, bits, level, distortion, start_i;
03539 const int esc_length= s->ac_esc_length;
03540 uint8_t * length;
03541 uint8_t * last_length;
03542
03543 assert(h==8);
03544
03545 copy_block8(lsrc1, src1, 8, stride, 8);
03546 copy_block8(lsrc2, src2, 8, stride, 8);
03547
03548 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
03549
03550 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03551
03552 bits=0;
03553
03554 if (s->mb_intra) {
03555 start_i = 1;
03556 length = s->intra_ac_vlc_length;
03557 last_length= s->intra_ac_vlc_last_length;
03558 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03559 } else {
03560 start_i = 0;
03561 length = s->inter_ac_vlc_length;
03562 last_length= s->inter_ac_vlc_last_length;
03563 }
03564
03565 if(last>=start_i){
03566 run=0;
03567 for(i=start_i; i<last; i++){
03568 int j= scantable[i];
03569 level= temp[j];
03570
03571 if(level){
03572 level+=64;
03573 if((level&(~127)) == 0){
03574 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03575 }else
03576 bits+= esc_length;
03577 run=0;
03578 }else
03579 run++;
03580 }
03581 i= scantable[last];
03582
03583 level= temp[i] + 64;
03584
03585 assert(level - 64);
03586
03587 if((level&(~127)) == 0){
03588 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03589 }else
03590 bits+= esc_length;
03591
03592 }
03593
03594 if(last>=0){
03595 if(s->mb_intra)
03596 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03597 else
03598 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03599 }
03600
03601 s->dsp.idct_add(lsrc2, 8, temp);
03602
03603 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
03604
03605 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03606 }
03607
03608 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03609 MpegEncContext * const s= (MpegEncContext *)c;
03610 const uint8_t *scantable= s->intra_scantable.permutated;
03611 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03612 int i, last, run, bits, level, start_i;
03613 const int esc_length= s->ac_esc_length;
03614 uint8_t * length;
03615 uint8_t * last_length;
03616
03617 assert(h==8);
03618
03619 s->dsp.diff_pixels(temp, src1, src2, stride);
03620
03621 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03622
03623 bits=0;
03624
03625 if (s->mb_intra) {
03626 start_i = 1;
03627 length = s->intra_ac_vlc_length;
03628 last_length= s->intra_ac_vlc_last_length;
03629 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03630 } else {
03631 start_i = 0;
03632 length = s->inter_ac_vlc_length;
03633 last_length= s->inter_ac_vlc_last_length;
03634 }
03635
03636 if(last>=start_i){
03637 run=0;
03638 for(i=start_i; i<last; i++){
03639 int j= scantable[i];
03640 level= temp[j];
03641
03642 if(level){
03643 level+=64;
03644 if((level&(~127)) == 0){
03645 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03646 }else
03647 bits+= esc_length;
03648 run=0;
03649 }else
03650 run++;
03651 }
03652 i= scantable[last];
03653
03654 level= temp[i] + 64;
03655
03656 assert(level - 64);
03657
03658 if((level&(~127)) == 0){
03659 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03660 }else
03661 bits+= esc_length;
03662 }
03663
03664 return bits;
03665 }
03666
03667 #define VSAD_INTRA(size) \
03668 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03669 int score=0; \
03670 int x,y; \
03671 \
03672 for(y=1; y<h; y++){ \
03673 for(x=0; x<size; x+=4){ \
03674 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
03675 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
03676 } \
03677 s+= stride; \
03678 } \
03679 \
03680 return score; \
03681 }
03682 VSAD_INTRA(8)
03683 VSAD_INTRA(16)
03684
03685 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03686 int score=0;
03687 int x,y;
03688
03689 for(y=1; y<h; y++){
03690 for(x=0; x<16; x++){
03691 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03692 }
03693 s1+= stride;
03694 s2+= stride;
03695 }
03696
03697 return score;
03698 }
03699
03700 #define SQ(a) ((a)*(a))
03701 #define VSSE_INTRA(size) \
03702 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03703 int score=0; \
03704 int x,y; \
03705 \
03706 for(y=1; y<h; y++){ \
03707 for(x=0; x<size; x+=4){ \
03708 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
03709 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
03710 } \
03711 s+= stride; \
03712 } \
03713 \
03714 return score; \
03715 }
03716 VSSE_INTRA(8)
03717 VSSE_INTRA(16)
03718
03719 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03720 int score=0;
03721 int x,y;
03722
03723 for(y=1; y<h; y++){
03724 for(x=0; x<16; x++){
03725 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03726 }
03727 s1+= stride;
03728 s2+= stride;
03729 }
03730
03731 return score;
03732 }
03733
03734 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
03735 int size){
03736 int score=0;
03737 int i;
03738 for(i=0; i<size; i++)
03739 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
03740 return score;
03741 }
03742
03743 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03744 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03745 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03746 #if CONFIG_GPL
03747 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
03748 #endif
03749 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03750 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03751 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
03752 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
03753
03754 static void vector_fmul_c(float *dst, const float *src, int len){
03755 int i;
03756 for(i=0; i<len; i++)
03757 dst[i] *= src[i];
03758 }
03759
03760 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
03761 int i;
03762 src1 += len-1;
03763 for(i=0; i<len; i++)
03764 dst[i] = src0[i] * src1[-i];
03765 }
03766
03767 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
03768 int i;
03769 for(i=0; i<len; i++)
03770 dst[i] = src0[i] * src1[i] + src2[i];
03771 }
03772
03773 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
03774 int i,j;
03775 dst += len;
03776 win += len;
03777 src0+= len;
03778 for(i=-len, j=len-1; i<0; i++, j--) {
03779 float s0 = src0[i];
03780 float s1 = src1[j];
03781 float wi = win[i];
03782 float wj = win[j];
03783 dst[i] = s0*wj - s1*wi + add_bias;
03784 dst[j] = s0*wi + s1*wj + add_bias;
03785 }
03786 }
03787
03788 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
03789 int len)
03790 {
03791 int i;
03792 for (i = 0; i < len; i++)
03793 dst[i] = src[i] * mul;
03794 }
03795
03796 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
03797 const float **sv, float mul, int len)
03798 {
03799 int i;
03800 for (i = 0; i < len; i += 2, sv++) {
03801 dst[i ] = src[i ] * sv[0][0] * mul;
03802 dst[i+1] = src[i+1] * sv[0][1] * mul;
03803 }
03804 }
03805
03806 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
03807 const float **sv, float mul, int len)
03808 {
03809 int i;
03810 for (i = 0; i < len; i += 4, sv++) {
03811 dst[i ] = src[i ] * sv[0][0] * mul;
03812 dst[i+1] = src[i+1] * sv[0][1] * mul;
03813 dst[i+2] = src[i+2] * sv[0][2] * mul;
03814 dst[i+3] = src[i+3] * sv[0][3] * mul;
03815 }
03816 }
03817
03818 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
03819 int len)
03820 {
03821 int i;
03822 for (i = 0; i < len; i += 2, sv++) {
03823 dst[i ] = sv[0][0] * mul;
03824 dst[i+1] = sv[0][1] * mul;
03825 }
03826 }
03827
03828 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
03829 int len)
03830 {
03831 int i;
03832 for (i = 0; i < len; i += 4, sv++) {
03833 dst[i ] = sv[0][0] * mul;
03834 dst[i+1] = sv[0][1] * mul;
03835 dst[i+2] = sv[0][2] * mul;
03836 dst[i+3] = sv[0][3] * mul;
03837 }
03838 }
03839
03840 static void butterflies_float_c(float *restrict v1, float *restrict v2,
03841 int len)
03842 {
03843 int i;
03844 for (i = 0; i < len; i++) {
03845 float t = v1[i] - v2[i];
03846 v1[i] += v2[i];
03847 v2[i] = t;
03848 }
03849 }
03850
03851 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
03852 {
03853 float p = 0.0;
03854 int i;
03855
03856 for (i = 0; i < len; i++)
03857 p += v1[i] * v2[i];
03858
03859 return p;
03860 }
03861
03862 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
03863 int i;
03864 for(i=0; i<len; i++)
03865 dst[i] = src[i] * mul;
03866 }
03867
03868 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
03869 uint32_t maxi, uint32_t maxisign)
03870 {
03871
03872 if(a > mini) return mini;
03873 else if((a^(1<<31)) > maxisign) return maxi;
03874 else return a;
03875 }
03876
03877 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
03878 int i;
03879 uint32_t mini = *(uint32_t*)min;
03880 uint32_t maxi = *(uint32_t*)max;
03881 uint32_t maxisign = maxi ^ (1<<31);
03882 uint32_t *dsti = (uint32_t*)dst;
03883 const uint32_t *srci = (const uint32_t*)src;
03884 for(i=0; i<len; i+=8) {
03885 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
03886 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
03887 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
03888 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
03889 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
03890 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
03891 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
03892 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
03893 }
03894 }
03895 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
03896 int i;
03897 if(min < 0 && max > 0) {
03898 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
03899 } else {
03900 for(i=0; i < len; i+=8) {
03901 dst[i ] = av_clipf(src[i ], min, max);
03902 dst[i + 1] = av_clipf(src[i + 1], min, max);
03903 dst[i + 2] = av_clipf(src[i + 2], min, max);
03904 dst[i + 3] = av_clipf(src[i + 3], min, max);
03905 dst[i + 4] = av_clipf(src[i + 4], min, max);
03906 dst[i + 5] = av_clipf(src[i + 5], min, max);
03907 dst[i + 6] = av_clipf(src[i + 6], min, max);
03908 dst[i + 7] = av_clipf(src[i + 7], min, max);
03909 }
03910 }
03911 }
03912
03913 static av_always_inline int float_to_int16_one(const float *src){
03914 int_fast32_t tmp = *(const int32_t*)src;
03915 if(tmp & 0xf0000){
03916 tmp = (0x43c0ffff - tmp)>>31;
03917
03918
03919
03920 }
03921 return tmp - 0x8000;
03922 }
03923
03924 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
03925 int i;
03926 for(i=0; i<len; i++)
03927 dst[i] = float_to_int16_one(src+i);
03928 }
03929
03930 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
03931 int i,j,c;
03932 if(channels==2){
03933 for(i=0; i<len; i++){
03934 dst[2*i] = float_to_int16_one(src[0]+i);
03935 dst[2*i+1] = float_to_int16_one(src[1]+i);
03936 }
03937 }else{
03938 for(c=0; c<channels; c++)
03939 for(i=0, j=c; i<len; i++, j+=channels)
03940 dst[j] = float_to_int16_one(src[c]+i);
03941 }
03942 }
03943
03944 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
03945 {
03946 int res = 0;
03947
03948 while (order--)
03949 res += (*v1++ * *v2++) >> shift;
03950
03951 return res;
03952 }
03953
03954 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
03955 {
03956 int res = 0;
03957 while (order--) {
03958 res += *v1 * *v2++;
03959 *v1++ += mul * *v3++;
03960 }
03961 return res;
03962 }
03963
03964 #define W0 2048
03965 #define W1 2841
03966 #define W2 2676
03967 #define W3 2408
03968 #define W4 2048
03969 #define W5 1609
03970 #define W6 1108
03971 #define W7 565
03972
03973 static void wmv2_idct_row(short * b)
03974 {
03975 int s1,s2;
03976 int a0,a1,a2,a3,a4,a5,a6,a7;
03977
03978 a1 = W1*b[1]+W7*b[7];
03979 a7 = W7*b[1]-W1*b[7];
03980 a5 = W5*b[5]+W3*b[3];
03981 a3 = W3*b[5]-W5*b[3];
03982 a2 = W2*b[2]+W6*b[6];
03983 a6 = W6*b[2]-W2*b[6];
03984 a0 = W0*b[0]+W0*b[4];
03985 a4 = W0*b[0]-W0*b[4];
03986
03987 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03988 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03989
03990 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
03991 b[1] = (a4+a6 +s1 + (1<<7))>>8;
03992 b[2] = (a4-a6 +s2 + (1<<7))>>8;
03993 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
03994 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
03995 b[5] = (a4-a6 -s2 + (1<<7))>>8;
03996 b[6] = (a4+a6 -s1 + (1<<7))>>8;
03997 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
03998 }
03999 static void wmv2_idct_col(short * b)
04000 {
04001 int s1,s2;
04002 int a0,a1,a2,a3,a4,a5,a6,a7;
04003
04004 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
04005 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
04006 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
04007 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
04008 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
04009 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
04010 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
04011 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
04012
04013 s1 = (181*(a1-a5+a7-a3)+128)>>8;
04014 s2 = (181*(a1-a5-a7+a3)+128)>>8;
04015
04016 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
04017 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
04018 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
04019 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
04020
04021 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
04022 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
04023 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
04024 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
04025 }
04026 void ff_wmv2_idct_c(short * block){
04027 int i;
04028
04029 for(i=0;i<64;i+=8){
04030 wmv2_idct_row(block+i);
04031 }
04032 for(i=0;i<8;i++){
04033 wmv2_idct_col(block+i);
04034 }
04035 }
04036
04037
04038 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
04039 {
04040 ff_wmv2_idct_c(block);
04041 put_pixels_clamped_c(block, dest, line_size);
04042 }
04043 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
04044 {
04045 ff_wmv2_idct_c(block);
04046 add_pixels_clamped_c(block, dest, line_size);
04047 }
04048 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
04049 {
04050 j_rev_dct (block);
04051 put_pixels_clamped_c(block, dest, line_size);
04052 }
04053 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
04054 {
04055 j_rev_dct (block);
04056 add_pixels_clamped_c(block, dest, line_size);
04057 }
04058
04059 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
04060 {
04061 j_rev_dct4 (block);
04062 put_pixels_clamped4_c(block, dest, line_size);
04063 }
04064 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
04065 {
04066 j_rev_dct4 (block);
04067 add_pixels_clamped4_c(block, dest, line_size);
04068 }
04069
04070 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
04071 {
04072 j_rev_dct2 (block);
04073 put_pixels_clamped2_c(block, dest, line_size);
04074 }
04075 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
04076 {
04077 j_rev_dct2 (block);
04078 add_pixels_clamped2_c(block, dest, line_size);
04079 }
04080
04081 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
04082 {
04083 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04084
04085 dest[0] = cm[(block[0] + 4)>>3];
04086 }
04087 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
04088 {
04089 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04090
04091 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
04092 }
04093
04094 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
04095
04096
04097 av_cold void dsputil_static_init(void)
04098 {
04099 int i;
04100
04101 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
04102 for(i=0;i<MAX_NEG_CROP;i++) {
04103 ff_cropTbl[i] = 0;
04104 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
04105 }
04106
04107 for(i=0;i<512;i++) {
04108 ff_squareTbl[i] = (i - 256) * (i - 256);
04109 }
04110
04111 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
04112 }
04113
04114 int ff_check_alignment(void){
04115 static int did_fail=0;
04116 DECLARE_ALIGNED(16, int, aligned);
04117
04118 if((intptr_t)&aligned & 15){
04119 if(!did_fail){
04120 #if HAVE_MMX || HAVE_ALTIVEC
04121 av_log(NULL, AV_LOG_ERROR,
04122 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
04123 "and may be very slow or crash. This is not a bug in libavcodec,\n"
04124 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
04125 "Do not report crashes to FFmpeg developers.\n");
04126 #endif
04127 did_fail=1;
04128 }
04129 return -1;
04130 }
04131 return 0;
04132 }
04133
04134 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
04135 {
04136 int i;
04137
04138 ff_check_alignment();
04139
04140 #if CONFIG_ENCODERS
04141 if(avctx->dct_algo==FF_DCT_FASTINT) {
04142 c->fdct = fdct_ifast;
04143 c->fdct248 = fdct_ifast248;
04144 }
04145 else if(avctx->dct_algo==FF_DCT_FAAN) {
04146 c->fdct = ff_faandct;
04147 c->fdct248 = ff_faandct248;
04148 }
04149 else {
04150 c->fdct = ff_jpeg_fdct_islow;
04151 c->fdct248 = ff_fdct248_islow;
04152 }
04153 #endif //CONFIG_ENCODERS
04154
04155 if(avctx->lowres==1){
04156 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
04157 c->idct_put= ff_jref_idct4_put;
04158 c->idct_add= ff_jref_idct4_add;
04159 }else{
04160 c->idct_put= ff_h264_lowres_idct_put_c;
04161 c->idct_add= ff_h264_lowres_idct_add_c;
04162 }
04163 c->idct = j_rev_dct4;
04164 c->idct_permutation_type= FF_NO_IDCT_PERM;
04165 }else if(avctx->lowres==2){
04166 c->idct_put= ff_jref_idct2_put;
04167 c->idct_add= ff_jref_idct2_add;
04168 c->idct = j_rev_dct2;
04169 c->idct_permutation_type= FF_NO_IDCT_PERM;
04170 }else if(avctx->lowres==3){
04171 c->idct_put= ff_jref_idct1_put;
04172 c->idct_add= ff_jref_idct1_add;
04173 c->idct = j_rev_dct1;
04174 c->idct_permutation_type= FF_NO_IDCT_PERM;
04175 }else{
04176 if(avctx->idct_algo==FF_IDCT_INT){
04177 c->idct_put= ff_jref_idct_put;
04178 c->idct_add= ff_jref_idct_add;
04179 c->idct = j_rev_dct;
04180 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
04181 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
04182 avctx->idct_algo==FF_IDCT_VP3){
04183 c->idct_put= ff_vp3_idct_put_c;
04184 c->idct_add= ff_vp3_idct_add_c;
04185 c->idct = ff_vp3_idct_c;
04186 c->idct_permutation_type= FF_NO_IDCT_PERM;
04187 }else if(avctx->idct_algo==FF_IDCT_WMV2){
04188 c->idct_put= ff_wmv2_idct_put_c;
04189 c->idct_add= ff_wmv2_idct_add_c;
04190 c->idct = ff_wmv2_idct_c;
04191 c->idct_permutation_type= FF_NO_IDCT_PERM;
04192 }else if(avctx->idct_algo==FF_IDCT_FAAN){
04193 c->idct_put= ff_faanidct_put;
04194 c->idct_add= ff_faanidct_add;
04195 c->idct = ff_faanidct;
04196 c->idct_permutation_type= FF_NO_IDCT_PERM;
04197 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
04198 c->idct_put= ff_ea_idct_put_c;
04199 c->idct_permutation_type= FF_NO_IDCT_PERM;
04200 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
04201 c->idct = ff_bink_idct_c;
04202 c->idct_add = ff_bink_idct_add_c;
04203 c->idct_put = ff_bink_idct_put_c;
04204 c->idct_permutation_type = FF_NO_IDCT_PERM;
04205 }else{
04206 c->idct_put= ff_simple_idct_put;
04207 c->idct_add= ff_simple_idct_add;
04208 c->idct = ff_simple_idct;
04209 c->idct_permutation_type= FF_NO_IDCT_PERM;
04210 }
04211 }
04212
04213 c->get_pixels = get_pixels_c;
04214 c->diff_pixels = diff_pixels_c;
04215 c->put_pixels_clamped = put_pixels_clamped_c;
04216 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
04217 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
04218 c->add_pixels_clamped = add_pixels_clamped_c;
04219 c->add_pixels8 = add_pixels8_c;
04220 c->add_pixels4 = add_pixels4_c;
04221 c->sum_abs_dctelem = sum_abs_dctelem_c;
04222 c->gmc1 = gmc1_c;
04223 c->gmc = ff_gmc_c;
04224 c->clear_block = clear_block_c;
04225 c->clear_blocks = clear_blocks_c;
04226 c->pix_sum = pix_sum_c;
04227 c->pix_norm1 = pix_norm1_c;
04228
04229 c->fill_block_tab[0] = fill_block16_c;
04230 c->fill_block_tab[1] = fill_block8_c;
04231 c->scale_block = scale_block_c;
04232
04233
04234 c->pix_abs[0][0] = pix_abs16_c;
04235 c->pix_abs[0][1] = pix_abs16_x2_c;
04236 c->pix_abs[0][2] = pix_abs16_y2_c;
04237 c->pix_abs[0][3] = pix_abs16_xy2_c;
04238 c->pix_abs[1][0] = pix_abs8_c;
04239 c->pix_abs[1][1] = pix_abs8_x2_c;
04240 c->pix_abs[1][2] = pix_abs8_y2_c;
04241 c->pix_abs[1][3] = pix_abs8_xy2_c;
04242
04243 #define dspfunc(PFX, IDX, NUM) \
04244 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04245 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04246 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04247 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04248
04249 dspfunc(put, 0, 16);
04250 dspfunc(put_no_rnd, 0, 16);
04251 dspfunc(put, 1, 8);
04252 dspfunc(put_no_rnd, 1, 8);
04253 dspfunc(put, 2, 4);
04254 dspfunc(put, 3, 2);
04255
04256 dspfunc(avg, 0, 16);
04257 dspfunc(avg_no_rnd, 0, 16);
04258 dspfunc(avg, 1, 8);
04259 dspfunc(avg_no_rnd, 1, 8);
04260 dspfunc(avg, 2, 4);
04261 dspfunc(avg, 3, 2);
04262 #undef dspfunc
04263
04264 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04265 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04266
04267 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04268 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04269 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04270 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04271 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04272 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04273 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04274 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04275 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04276
04277 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04278 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04279 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04280 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04281 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04282 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04283 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04284 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04285 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04286
04287 #define dspfunc(PFX, IDX, NUM) \
04288 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04289 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04290 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04291 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04292 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04293 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04294 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04295 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04296 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04297 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04298 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04299 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04300 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04301 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04302 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04303 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04304
04305 dspfunc(put_qpel, 0, 16);
04306 dspfunc(put_no_rnd_qpel, 0, 16);
04307
04308 dspfunc(avg_qpel, 0, 16);
04309
04310
04311 dspfunc(put_qpel, 1, 8);
04312 dspfunc(put_no_rnd_qpel, 1, 8);
04313
04314 dspfunc(avg_qpel, 1, 8);
04315
04316
04317 dspfunc(put_h264_qpel, 0, 16);
04318 dspfunc(put_h264_qpel, 1, 8);
04319 dspfunc(put_h264_qpel, 2, 4);
04320 dspfunc(put_h264_qpel, 3, 2);
04321 dspfunc(avg_h264_qpel, 0, 16);
04322 dspfunc(avg_h264_qpel, 1, 8);
04323 dspfunc(avg_h264_qpel, 2, 4);
04324
04325 #undef dspfunc
04326 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04327 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04328 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04329 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04330 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04331 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04332 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
04333 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
04334
04335 c->draw_edges = draw_edges_c;
04336
04337 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
04338 ff_mlp_init(c, avctx);
04339 #endif
04340 #if CONFIG_VC1_DECODER
04341 ff_vc1dsp_init(c,avctx);
04342 #endif
04343 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
04344 ff_intrax8dsp_init(c,avctx);
04345 #endif
04346 #if CONFIG_RV30_DECODER
04347 ff_rv30dsp_init(c,avctx);
04348 #endif
04349 #if CONFIG_RV40_DECODER
04350 ff_rv40dsp_init(c,avctx);
04351 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
04352 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
04353 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
04354 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
04355 #endif
04356
04357 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
04358 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04359 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04360 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04361 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04362 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04363 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04364 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04365
04366 #define SET_CMP_FUNC(name) \
04367 c->name[0]= name ## 16_c;\
04368 c->name[1]= name ## 8x8_c;
04369
04370 SET_CMP_FUNC(hadamard8_diff)
04371 c->hadamard8_diff[4]= hadamard8_intra16_c;
04372 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
04373 SET_CMP_FUNC(dct_sad)
04374 SET_CMP_FUNC(dct_max)
04375 #if CONFIG_GPL
04376 SET_CMP_FUNC(dct264_sad)
04377 #endif
04378 c->sad[0]= pix_abs16_c;
04379 c->sad[1]= pix_abs8_c;
04380 c->sse[0]= sse16_c;
04381 c->sse[1]= sse8_c;
04382 c->sse[2]= sse4_c;
04383 SET_CMP_FUNC(quant_psnr)
04384 SET_CMP_FUNC(rd)
04385 SET_CMP_FUNC(bit)
04386 c->vsad[0]= vsad16_c;
04387 c->vsad[4]= vsad_intra16_c;
04388 c->vsad[5]= vsad_intra8_c;
04389 c->vsse[0]= vsse16_c;
04390 c->vsse[4]= vsse_intra16_c;
04391 c->vsse[5]= vsse_intra8_c;
04392 c->nsse[0]= nsse16_c;
04393 c->nsse[1]= nsse8_c;
04394 #if CONFIG_DWT
04395 ff_dsputil_init_dwt(c);
04396 #endif
04397
04398 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04399
04400 c->add_bytes= add_bytes_c;
04401 c->add_bytes_l2= add_bytes_l2_c;
04402 c->diff_bytes= diff_bytes_c;
04403 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
04404 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04405 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
04406 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
04407 c->bswap_buf= bswap_buf;
04408 #if CONFIG_PNG_DECODER
04409 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
04410 #endif
04411
04412 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
04413 c->h263_h_loop_filter= h263_h_loop_filter_c;
04414 c->h263_v_loop_filter= h263_v_loop_filter_c;
04415 }
04416
04417 if (CONFIG_VP3_DECODER) {
04418 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
04419 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
04420 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
04421 }
04422
04423 c->h261_loop_filter= h261_loop_filter_c;
04424
04425 c->try_8x8basis= try_8x8basis_c;
04426 c->add_8x8basis= add_8x8basis_c;
04427
04428 #if CONFIG_VORBIS_DECODER
04429 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04430 #endif
04431 #if CONFIG_AC3_DECODER
04432 c->ac3_downmix = ff_ac3_downmix_c;
04433 #endif
04434 #if CONFIG_LPC
04435 c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
04436 #endif
04437 c->vector_fmul = vector_fmul_c;
04438 c->vector_fmul_reverse = vector_fmul_reverse_c;
04439 c->vector_fmul_add = vector_fmul_add_c;
04440 c->vector_fmul_window = ff_vector_fmul_window_c;
04441 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
04442 c->vector_clipf = vector_clipf_c;
04443 c->float_to_int16 = ff_float_to_int16_c;
04444 c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
04445 c->scalarproduct_int16 = scalarproduct_int16_c;
04446 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
04447 c->scalarproduct_float = scalarproduct_float_c;
04448 c->butterflies_float = butterflies_float_c;
04449 c->vector_fmul_scalar = vector_fmul_scalar_c;
04450
04451 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
04452 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
04453
04454 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
04455 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
04456
04457 c->shrink[0]= av_image_copy_plane;
04458 c->shrink[1]= ff_shrink22;
04459 c->shrink[2]= ff_shrink44;
04460 c->shrink[3]= ff_shrink88;
04461
04462 c->prefetch= just_return;
04463
04464 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04465 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04466
04467 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
04468 if (ARCH_ARM) dsputil_init_arm (c, avctx);
04469 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
04470 if (HAVE_VIS) dsputil_init_vis (c, avctx);
04471 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
04472 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
04473 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
04474 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
04475 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
04476
04477 for(i=0; i<64; i++){
04478 if(!c->put_2tap_qpel_pixels_tab[0][i])
04479 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04480 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04481 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04482 }
04483
04484 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
04485 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
04486 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
04487 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
04488
04489 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
04490 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
04491 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
04492 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
04493
04494 switch(c->idct_permutation_type){
04495 case FF_NO_IDCT_PERM:
04496 for(i=0; i<64; i++)
04497 c->idct_permutation[i]= i;
04498 break;
04499 case FF_LIBMPEG2_IDCT_PERM:
04500 for(i=0; i<64; i++)
04501 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04502 break;
04503 case FF_SIMPLE_IDCT_PERM:
04504 for(i=0; i<64; i++)
04505 c->idct_permutation[i]= simple_mmx_permutation[i];
04506 break;
04507 case FF_TRANSPOSE_IDCT_PERM:
04508 for(i=0; i<64; i++)
04509 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04510 break;
04511 case FF_PARTTRANS_IDCT_PERM:
04512 for(i=0; i<64; i++)
04513 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04514 break;
04515 case FF_SSE2_IDCT_PERM:
04516 for(i=0; i<64; i++)
04517 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
04518 break;
04519 default:
04520 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04521 }
04522 }
04523