#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "global.h"
#include "math.h"
#include "cpu_accel.h"
#include "fastintfns.h"
#include "motionsearch.h"

/* Macro-block Motion estimation results record */

typedef struct _blockcrd
{
	int16_t x;
	int16_t y;
} blockxy;

struct mb_motion
{
	blockxy pos;        // Half-pel co-ordinates of source block
	int sad;			// Sum of absolute difference
	int var;
	uint8_t *blk ;		// Source block data (in luminace data array)
	int hx, hy;			// Half-pel offsets
	int fieldsel;		// 0 = top 1 = bottom
	int fieldoff;       // Offset from start of frame data to first line
						// of field.	(top = 0, bottom = width );
};
typedef struct mb_motion mb_motion_s;

struct subsampled_mb
{
	uint8_t *mb;		// One pel
	uint8_t *fmb;		// Two-pel subsampled
	uint8_t *qmb;		// Four-pel subsampled
	uint8_t *umb; 		// U compoenent one-pel
	uint8_t *vmb;       // V component  one-pel
};
typedef struct subsampled_mb subsampled_mb_s;


static void frame_ME (pict_data_s *picture, int mboffset, int i, int j, struct mbinfo *mbi);

static void mb_me_search (
	uint8_t *org, uint8_t *ref,
    int fieldoff,
	subsampled_mb_s *ssblk,
	int lx, int i0, int j0, 
	int sx, int sy, int h, 
	int xmax, int ymax,
	mb_motion_s *motion );


void init_motion(void)
{
	init_motion_search();
}

void motion_subsampled_lum( pict_data_s *picture )
{
	(*psubsample_image)( picture->curorg[0], 
					 opt_phy_width,
					 picture->curorg[0]+fsubsample_offset, 
					 picture->curorg[0]+qsubsample_offset );
}


void motion_estimation
	( pict_data_s *picture )
{
	mbinfo_s *mbi = picture->mbinfo;
	int i, j;
	int mb_row_incr;			/* Offset increment to go down 1 row of mb's */
	int mb_row_start;
	mb_row_start = 0;
		
	mb_row_incr = 16*opt_phy_width;
	
	/* loop through all macroblocks of a frame picture */
	for (j=0; j<opt_enc_height; j+=16)
	{
		for (i=0; i<opt_enc_width; i+=16)
		{
			frame_ME(picture,  mb_row_start, i, j, mbi);
			mbi++;
		}
		mb_row_start += mb_row_incr;
	}

}


static __inline__ int unidir_pred_var( const mb_motion_s *motion,
							uint8_t *mb,  
							int lx, 
							int h)
{
	return (*psumsq)(motion->blk, mb, lx, motion->hx, motion->hy, h);
}


static __inline__ int bidir_pred_var( const mb_motion_s *motion_f, 
									  const mb_motion_s *motion_b,
									  uint8_t *mb,  
									  int lx, int h)
{
	return (*pbsumsq)( motion_f->blk, motion_b->blk,
					   mb, lx, 
					   motion_f->hx, motion_f->hy,
					   motion_b->hx, motion_b->hy,
					   h);
}

static int unidir_var_sum( mb_motion_s *lum_mc, 
						   uint8_t **ref, 
						   subsampled_mb_s *ssblk,
						   int lx, int h )
{
	int uvlx = (lx>>1);
	int uvh = (h>>1);
	/* N.b. MC co-ordinates are computed in half-pel units! */
	int cblkoffset = (lum_mc->fieldoff>>1) +
		(lum_mc->pos.x>>2) + (lum_mc->pos.y>>2)*uvlx;
	
	return 	lum_mc->var +
		((*psumsq_sub22)( ref[1] + cblkoffset, ssblk->umb, uvlx, uvh) +
		 (*psumsq_sub22)( ref[2] + cblkoffset, ssblk->vmb, uvlx, uvh));
}

static int bidir_var_sum( mb_motion_s *lum_mc_f, 
						  mb_motion_s *lum_mc_b, 
						  uint8_t **ref_f, 
						  uint8_t **ref_b,
						  subsampled_mb_s *ssblk,
						  int lx, int h )
{
	int uvlx = (lx>>1);
	int uvh = (h>>1);
	/* N.b. MC co-ordinates are computed in half-pel units! */
	int cblkoffset_f = (lum_mc_f->fieldoff>>1) + 
		(lum_mc_f->pos.x>>2) + (lum_mc_f->pos.y>>2)*uvlx;
	int cblkoffset_b = (lum_mc_b->fieldoff>>1) + 
		(lum_mc_b->pos.x>>2) + (lum_mc_b->pos.y>>2)*uvlx;
	
	return 	(
		(*pbsumsq)( lum_mc_f->blk, lum_mc_b->blk,
					ssblk->mb, lx, 
					lum_mc_f->hx, lum_mc_f->hy,
					lum_mc_b->hx, lum_mc_b->hy,
					h) +
		(*pbsumsq_sub22)( ref_f[1] + cblkoffset_f, ref_b[1] + cblkoffset_b,
					   ssblk->umb, uvlx, uvh ) +
		(*pbsumsq_sub22)( ref_f[2] + cblkoffset_f, ref_b[2] + cblkoffset_b,
					   ssblk->vmb, uvlx, uvh ));

}

/*
 * Sum of chrominance variance of a block.
 */
static __inline__ int chrom_var_sum( subsampled_mb_s *ssblk, int h, int lx )
{
	return ((*pvariance)(ssblk->umb,(h>>1),(lx>>1)) + 
			(*pvariance)(ssblk->vmb,(h>>1),(lx>>1))) * 2;
}


static __inline__ int bidir_pred_sad( const mb_motion_s *motion_f, 
									  const mb_motion_s *motion_b,
									  uint8_t *mb,  
									  int lx, int h)
{
	return (*pbsad)(motion_f->blk, motion_b->blk, 
					 mb, lx, 
					 motion_f->hx, motion_f->hy,
					 motion_b->hx, motion_b->hy,
					 h);
}

static void frame_ME(pict_data_s *picture,
					 int mb_row_start,
					 int i, int j, 
					 mbinfo_s *mbi)
{
	uint8_t **oldrefimg, **newrefimg;

	mb_motion_s framef_mc;
	mb_motion_s frameb_mc;
	mb_motion_s zeromot_mc;

	int var,v0;
	int vmc,vmcf,vmcr,vmci;
	subsampled_mb_s ssmb;

	
	/* Select between the original or the reconstructed image for
	   final refinement of motion estimation */
	if( ctl_refine_from_rec )
	{
		oldrefimg = picture->oldref;
		newrefimg = picture->newref;
	}
	else
	{
		oldrefimg = picture->oldorg;
		newrefimg = picture->neworg;
	}

	ssmb.mb = picture->curorg[0] + mb_row_start + i;
	ssmb.umb = (uint8_t*)(picture->curorg[1] + (i>>1) + (mb_row_start>>2));
	ssmb.vmb = (uint8_t*)(picture->curorg[2] + (i>>1) + (mb_row_start>>2));
	ssmb.fmb = (uint8_t*)(picture->curorg[0] + fsubsample_offset + ((i>>1) + (mb_row_start>>2)));
	ssmb.qmb = (uint8_t*)(picture->curorg[0] + qsubsample_offset + (i>>2) + (mb_row_start>>4));


	zeromot_mc.pos.x = (i<<1);	/* Damn but its messy carrying doubled */
	zeromot_mc.pos.y = (j<<1);	/* absolute Co-ordinates for M/C */
	zeromot_mc.fieldsel = 0;
	zeromot_mc.fieldoff = 0;
	zeromot_mc.blk = oldrefimg[0]+mb_row_start+i;

	var = (*pvariance)(ssmb.mb,16,opt_phy_width);

	if (picture->pict_type==I_TYPE)
	{
		mbi->mb_type = MB_INTRA;
	}
	else if (picture->pict_type==P_TYPE)
	{
		var += chrom_var_sum(&ssmb,16,opt_phy_width);


		mb_me_search(picture->oldorg[0],oldrefimg[0], 0,
					&ssmb, opt_phy_width, i,j,picture->sxf,picture->syf,16,
					opt_enc_width,opt_enc_height, &framef_mc);

		framef_mc.fieldoff = 0;
		vmc = vmcf = unidir_var_sum( &framef_mc, oldrefimg, &ssmb,  opt_phy_width, 16 );
		mbi->motion_type = MC_FRAME;

		if (vmc>var*3/2 )
		{
			mbi->mb_type = MB_INTRA;
			mbi->var = var;
		}
		else
		{
			zeromot_mc.var = unidir_pred_var(&zeromot_mc, ssmb.mb, opt_phy_width, 16 );
			v0 = unidir_var_sum(&zeromot_mc, oldrefimg, &ssmb, opt_phy_width, 16 );

			if (4*v0>5*vmc )
			{
				/* use MC */
				var = vmc;
				mbi->mb_type = MB_FORWARD;
				mbi->MV[0][0][0] = framef_mc.pos.x - (i<<1);
				mbi->MV[0][0][1] = framef_mc.pos.y - (j<<1);
			}
			else
			{
				/* No-MC */
				var = v0;
				mbi->mb_type = 0;
				mbi->motion_type = MC_FRAME;
				mbi->MV[0][0][0] = 0;
				mbi->MV[0][0][1] = 0;
			}
		}
	}
	else /* if (pict_type==B_TYPE) */
	{
		var = (*pvariance)(ssmb.mb,16,opt_phy_width) + chrom_var_sum(&ssmb,16,opt_phy_width);
		/* forward */
		mb_me_search(picture->oldorg[0],oldrefimg[0],0,&ssmb,
						opt_phy_width,i,j,picture->sxf,picture->syf,
						16,opt_enc_width,opt_enc_height, &framef_mc);
		framef_mc.fieldoff = 0;
		vmcf = unidir_var_sum( &framef_mc, oldrefimg, &ssmb, opt_phy_width, 16 );

		/* backward */
		mb_me_search(picture->neworg[0],newrefimg[0],0,&ssmb, 
						opt_phy_width, i,j,picture->sxb,picture->syb,
						16, opt_enc_width, opt_enc_height, &frameb_mc);
		frameb_mc.fieldoff = 0;
		vmcr = unidir_var_sum( &frameb_mc, newrefimg, &ssmb, opt_phy_width, 16 );

		/* interpolated (bidirectional) */
		vmci = bidir_var_sum( &framef_mc, &frameb_mc, oldrefimg, newrefimg, &ssmb, opt_phy_width, 16 );

		/* decisions */

		/*	select between forward/backward/interpolated prediction:
			use the one with smallest mean sqaured prediction error */
		if (vmcf<=vmcr && vmcf<=vmci)
		{
			vmc = vmcf;
			mbi->mb_type = MB_FORWARD;
		}
		else if (vmcr<=vmci)
		{
			vmc = vmcr;
			mbi->mb_type = MB_BACKWARD;
		}
		else
		{
			vmc = vmci;
			mbi->mb_type = MB_FORWARD | MB_BACKWARD;
		}

		mbi->motion_type = MC_FRAME;
		
		if (vmc>var*3/2) mbi->mb_type = MB_INTRA;
		else
		{
			mbi->var = vmc;
			/* forward */
			mbi->MV[0][0][0] = framef_mc.pos.x - (i<<1);
			mbi->MV[0][0][1] = framef_mc.pos.y - (j<<1);
			/* backward */
			mbi->MV[0][1][0] = frameb_mc.pos.x - (i<<1);
			mbi->MV[0][1][1] = frameb_mc.pos.y - (j<<1);
		}
	}
}







static void mb_me_search(
	uint8_t *org,
	uint8_t *ref,
    int fieldoff,
	subsampled_mb_s *ssblk,
	int lx, int i0, int j0, 
	int sx, int sy, int h,
	int xmax, int ymax,
	mb_motion_s *res
	)
{
	me_result_s best;
	int i,j,ilow,ihigh,jlow,jhigh;
	int x,y;
	int d;

	uint8_t *s22org = (uint8_t*)(org+fsubsample_offset+(fieldoff>>1));
	uint8_t *s44org = (uint8_t*)(org+qsubsample_offset+(fieldoff>>2));
	uint8_t *orgblk;

    uint8_t *reffld = ref+fieldoff;

	int flx = lx >> 1;
	int qlx = lx >> 2;
	int fh = h >> 1;
	int qh = h >> 2;

	me_result_set sub44set;
	me_result_set sub22set;

	/* xmax and ymax into more useful form... */
	xmax -= 16;
	ymax -= h;
  

	jlow = j0-sy;
	jlow = jlow < 0 ? 0 : jlow;
	jhigh =  j0+(sy-1);
	jhigh = jhigh > ymax ? ymax : jhigh;
	ilow = i0-sx;
	ilow = ilow < 0 ? 0 : ilow;
	ihigh =  i0+(sx-1);
	ihigh = ihigh > xmax ? xmax : ihigh;

	best.weight = (*psad_00)(reffld+i0+j0*lx,ssblk->mb,lx,h,INT_MAX);
	best.x = 0;
	best.y = 0;

	(*pbuild_sub44_mests)( &sub44set,
							ilow, jlow, ihigh, jhigh,
							i0, j0,
							best.weight,
							s44org, 
							ssblk->qmb, qlx, qh,
							ctl_44_red); 

	
	(*pbuild_sub22_mests)( &sub44set, &sub22set,
							i0, j0, 
							ihigh,  jhigh, 
							best.weight,
							s22org, ssblk->fmb, flx, fh,
							ctl_22_red);


	(*pfind_best_one_pel)( &sub22set,
						   reffld, ssblk->mb, 
						   i0, j0,
						   ihigh, jhigh, 
						   lx, h, &best );

	res->sad = INT_MAX;
	x = (i0+best.x)<<1;
	y = (j0+best.y)<<1;


	ilow = x - (x>(ilow<<1));
	ihigh = x + (x<((ihigh)<<1));
	jlow = y - (y>(jlow<<1));
	jhigh =  y+ (y<((jhigh)<<1));

	for (j=jlow; j<=jhigh; j++)
	{
		for (i=ilow; i<=ihigh; i++)
		{
			orgblk = reffld+(i>>1)+((j>>1)*lx);
			if( i&1 )
			{
				if( j & 1 )
					d = (*psad_11)(orgblk,ssblk->mb,lx,h);
				else
					d = (*psad_01)(orgblk,ssblk->mb,lx,h);
			}
			else
			{
				if( j & 1 )
					d = (*psad_10)(orgblk,ssblk->mb,lx,h);
				else
					d = (*psad_00)(orgblk,ssblk->mb,lx,h,res->sad);
			}
			d += (intabs(i-(i0<<1)) + intabs(j-(j0<<1)))<<3;
			if (d<res->sad)
			{
				res->sad = d;
				res->pos.x = i;
				res->pos.y = j;
				res->blk = orgblk;
				res->hx = i&1;
				res->hy = j&1;
			}
		}
	}
	res->var = (*psumsq)(res->blk, ssblk->mb, lx, res->hx, res->hy, h);

}


