///////////////////////////////////////////////////////////////////////////////////////////////
///		LoadTexture																			///
///////////////////////////////////////////////////////////////////////////////////////////////
bool ZTexture::LoadTexture(char *filename)
{
	if(LoadJPGTexture(filename)==true)
	{
		activated = true;
		return StretchImage();
	}

	if(LoadPNGTexture(filename)==true)
	{
		activated = true;
		return StretchImage();
	}

	if(LoadBMPTexture(filename)==true)
	{
		activated = true;
		return StretchImage();
	}


	activated = false;
	return false;
}



///////////////////////////////////////////////////////////////////////////////////////////////
///		StretchImage																		///
///////////////////////////////////////////////////////////////////////////////////////////////
bool ZTexture::StretchImage()
{
	// ------ Stretch de la texture ------
	int		i, j, in, jn, x, y, ybase, yl;
	unsigned char *bufn;



#ifdef	_SCOL_DEBUG_
	MMechostr(0,"ZTexture::StretchImage ");
#endif

	if(!Image)		{	activated = false;	return false;	}

	i = Width;
	j = Height;

	if(accelerated)
	{
		if(		( (i==512)||(i==256)||(i==128)||(i==64)||(i==32) )
			&&  ( (j==512)||(j==256)||(j==128)||(j==64)||(j==32) )	)		return true;
	}
	else
	{
		if(    ((i==256) && (j==256) )
			|| ((i==512) && (j==256) )
			|| ((i==256) && (j==512) )
			|| ((i==512) && (j==512) )
			|| ((i==1024)&& (j==512) )
			|| ((i==512) && (j==1024))
			|| ((i==1024)&& (j==1024))
			|| ((i==256) && (j==128) )
			|| ((i==128) && (j==256) )
			|| ((i==128) && (j==128) )
			|| ((i==128) && (j==64)  )
			|| ((i==64)  && (j==128) )
			|| ((i==64)  && (j==64)  )
			|| ((i==64)  && (j==32)  )
			|| ((i==32)  && (j==64)  )
			|| ((i==32)  && (j==32)  ) )	return true;
	}

	if(i > j)
	{
			 if (i<=32)		in = 32;
		else if (i<=64)		in = 64;
		else if (i<=128)	in = 128;
		else if (i<=256)	in = 256;
		else				in = 512;

		if(i > j+j)			jn = in>>1;
		else				jn = in;
	}
	else
	{
			 if(j<=32)		jn = 32;
		else if(j<=64)		jn = 64;
		else if(j<=128)		jn = 128;
		else if(j<=256)		jn = 256;
		else				jn = 512;

		if(j > i+i)			in = jn>>1;
		else				in = jn;
	}

	bufn = (unsigned char *) malloc(in*jn*4);
	if(bufn==NULL)								{	activated = false;	return false;	}

	for(y=0; y<jn; y++)
	{
		ybase = y*in;
		yl	  = y*j/jn;
		yl	 *= i;

		for(x=0; x<in; x++)
		{
			bufn[ (ybase+x)*4+0 ] = Image[ (yl+x*i/in)*4+0 ];
			bufn[ (ybase+x)*4+1 ] = Image[ (yl+x*i/in)*4+1 ];
			bufn[ (ybase+x)*4+2 ] = Image[ (yl+x*i/in)*4+2 ];
			bufn[ (ybase+x)*4+3 ] = Image[ (yl+x*i/in)*4+3 ];
		}
	}

	free(Image);

	Image	= (unsigned char *)bufn;
	Width	= in;
	Height	= jn;

	return true;
}



///////////////////////////////////////////////////////////////////////////////////////////////
///		StretchImageModif																	///
///////////////////////////////////////////////////////////////////////////////////////////////
//$BLG
//This was the partial first code translation called from StretchImageModif().
/*
void BLG_Copy32Bits(unsigned char *src, unsigned char *dst)
{
	_asm
	{
		//Saving registries
		push eax
		push ebx
		//Copying data
		mov eax, src
		mov eax, [eax]
		mov ebx, dst
		mov [ebx], eax
		//Restoring registries
		pop ebx
		pop eax
	}
}
*/
// C Code translation
/*
void BLG_Stretch32BitsTexture(unsigned char *src, int i_i, int i_in, int i_x, int i_blg_ySrc, unsigned char *dst, int i_j, int i_jn, int i_y, int i_ybase, int i_yl, int i_blg_yDst)
{
	_asm
	{
		//Saving useful registries
		push eax
		push ebx
		push ecx
		push edx
		//Initializing Y loop
		mov i_y, 0
		NEWYL:
		  //i_ybase = i_y*i_in;
		  mov ebx, i_y
		  mov eax, i_in
		  mul ebx
		  mov i_ybase, eax
		  //i_yl = i_y*i_j/i_jn;
		  //i_yl *= i_i;
		  mov ebx, i_y   //This one can be deleted, i_y already i_in ebx
		  mov eax, i_j
		  mul ebx
		  mov ebx, i_jn
		  div ebx
		  mov ebx, i_i
		  mul ebx
		  mov i_yl, eax
		  //Initializing X loop
		  mov i_x, 0
		  NEWXL:
		    //i_blg_ySrc = (i_yl+i_x*i_i/i_in)*4;
		    mov ebx, i_x
		    mov eax, i_i
		    mul ebx
		    mov ebx, i_in
		    div ebx
		    add eax, i_yl
		    mov ebx, 4
		    mul ebx
		    mov i_blg_ySrc, eax
		    //i_blg_yDst = (i_ybase+i_x)*4;
		    mov ebx, i_ybase
		    add ebx, i_x
		    mov eax, 4
		    mul ebx
		    mov i_blg_yDst, eax
					//Copying data
					mov eax, src
					add eax, i_blg_ySrc
					mov eax, [eax]
					mov ebx, dst
					add ebx, i_blg_yDst
					mov [ebx], eax
				//Incrementing X counter and testing end of X loop
				inc i_x
				mov edx, i_in
				cmp edx, i_x
				jz  NXTYL
				jmp NEWXL
		  //Incrementing Y counter and testing end of Y loop
		  NXTYL:
		  inc i_y
		  mov edx, i_jn
		  cmp edx, i_y
		  jz  RSTOR
		  jmp NEWYL		
		//Restoring used registries
		RSTOR:
		pop edx
		pop ecx
		pop ebx
		pop eax
	}
}
*/
// C Code - Optimization 1
void BLG_Stretch32BitsTexture(unsigned char *src, int i_i, int i_in, int i_x, int i_blg_ySrc, unsigned char *dst, int i_j, int i_jn, int i_y, int i_ybase, int i_yl, int i_blg_yDst)
{
	_asm
	{
		//Saving useful registries
		push eax
		push ebx
		push ecx
		push edx
		//Initializing Y loop
		mov i_y, 0
		mov i_ybase, 0 //ybase is now a in++ counter
		mov ecx, i_jn
		NEWYL:
		  //Storing y loop counter to avoid conflict with x loop counter
		  push ecx
		  //ybase = y*in;
  		  //cf the above note regarding ybase
		  //yl = y*j/jn;
		  //yl *= i;
		  mov eax, i_y
		   inc eax     //We increment y right now, as it is stored in registry, this is faster than below inc in loop
		   mov i_y, eax
		   dec eax
		  mov ebx, i_j
		  mul ebx
		  mov ebx, i_jn
		  div ebx
		  mov ebx, i_i
		  mul ebx
		  mov i_yl, eax
		  //Initializing X loop
		  mov i_x, 0
		  mov ecx, i_in
		  NEWXL:
		    //blg_ySrc = (yl+x*i/in)*4;
		    mov ebx, i_x
		    mov eax, i_i
		    mul ebx
		    mov ebx, i_in   //in variable is kept in ebx till we increase ybase below (1 line gain)
		    div ebx
		    add eax, i_yl
		    shl eax, 2      //blg_ySrc is now in eax
		    //blg_yDst = (ybase+x)*4;
		    //inc i_ybase   //This is not an improvment ??? Although it replaces nxadd+1xadd by nxinc+0xadd ... Variable access probably ?
		    mov edx, i_x
		     inc edx        //We increment x right now, as it is stored in registry, this is faster than below inc in loop
		     mov i_x, edx
		     dec edx
		    add edx, i_ybase
		    shl edx, 2      //blg_yDst is now in edx
					//Copying data
					add eax, src
					mov eax, [eax]
					add edx, dst
					mov [edx], eax
				//Incrementing X counter and testing end of X loop
			  //inc i_x  //Moved above
				loop NEWXL
		  //Incrementing Y counter and testing end of Y loop
		  //inc i_y    //Moved above
		  add i_ybase, ebx
		  pop ecx
		  loop NEWYL	
		//Restoring used registries
		pop edx
		pop ecx
		pop ebx
		pop eax
	}
}

bool ZTexture::StretchImageModif()
{
	// ------ Stretch de la texture ------
	//$BLG - v4.6a4 - set all variables in next line to 0
	int		i=0, j=0, in=0, jn=0, x=0, y=0, ybase=0, yl=0;
	unsigned char *bufn;
	//$BLG Start - v4.6a2
	int blg_ySrc=0, blg_yDst=0;
	//$BLG End

#ifdef	_SCOL_DEBUG_
	MMechostr(0,"ZTexture::StretchImageModif ");
#endif

	if(!ImageModif)		{	activated = false;	return false;	}

	i = Width;
	j = Height;

  //$BLG
  //Removed next part as dimensions have already been checked before entering this function
  //cf ZM3blitTexture() in ZooScol.cpp
  //Powers of 2 (OpenGL limitation), from 64 to 1024 are accepted - 64 is an OpenGL 1.4 minimal specification
  //Note that using high dimension values has a huge framerate cost
  /*
	if(    ((i==256) && (j==256) )
		|| ((i==512) && (j==256) )
		|| ((i==256) && (j==512) )
		|| ((i==512) && (j==512) )
		|| ((i==1024)&& (j==512) )
		|| ((i==512) && (j==1024))
		|| ((i==1024)&& (j==1024))
		|| ((i==256) && (j==128) )
		|| ((i==128) && (j==256) )
		|| ((i==128) && (j==128) )
		|| ((i==128) && (j==64)  )
		|| ((i==64)  && (j==128) )
		|| ((i==64)  && (j==64)  )
		|| ((i==64)  && (j==32)  )
		|| ((i==32)  && (j==64)  )
		|| ((i==32)  && (j==32)  ) )	return true;
  */
  
	if(i > j)
	{
		//$BLG
		//Removed dimension 32 - OpenGL 1.4 ensures only 64 and above
		/*
		if (i<=32)		in = 32;
		else 
		*/
		if (i<=64)		in = 64;
		else if (i<=128)	in = 128;
		else if (i<=256)	in = 256;
		//$BLG
		//Added dimension 1024 - this one is included in ZM3blitTexture(), but not here
		//else				in = 512;
		else if (i<=512)	in = 512;
		else				      in = 1024;

		if(i > j+j)			  jn = in>>1;
		else				      jn = in;
	}
	else
	{
		//$BLG
		//Removed dimension 32 - OpenGL 1.4 ensures only 64 and above
		/*
		if(j<=32)		jn = 32;
		else 
		*/
		if (j<=64)		    jn = 64;
		else if(j<=128)		jn = 128;
		else if(j<=256)		jn = 256;
		//$BLG
		//Added dimension 1024 - this one is included in ZM3blitTexture(), but not here
		//else				    jn = 512;
		else if(j<=512)   jn = 512;
		else				      jn = 1024;

		if(j > i+i)			  in = jn>>1;
		else				      in = jn;
	}

	bufn = (unsigned char *) malloc(in*jn*4);
	if(bufn==NULL)								{	activated = false;	return false;	}
  
  //$BLG - v4.6a4
  //Removed the whole next part - replaced it with assembler code: BLG_Stretch32BitsTexture()
  BLG_Stretch32BitsTexture(ImageModif, i, in, x, blg_ySrc, bufn, j, jn, y, ybase, yl, blg_yDst);
  /*
  //$BLG - v4.6a4
  //Former code, already optimized by me - trying to use hardware performance...
  //Optimization forgotten: use of gluScaleImage() induced a 15x performance down !!!
  //Back to originally optimized code
	for(y=0; y<jn; y++)
	{
		ybase = y*in;
		yl	  = y*j/jn;
		yl	 *= i;

		for(x=0; x<in; x++)
		{
			//$BLG Modif - v4.6a2
			//bufn[ (ybase+x)*4+0 ] = ImageModif[ (yl+x*i/in)*4+0 ];
			//bufn[ (ybase+x)*4+1 ] = ImageModif[ (yl+x*i/in)*4+1 ];
			//bufn[ (ybase+x)*4+2 ] = ImageModif[ (yl+x*i/in)*4+2 ];
			//bufn[ (ybase+x)*4+3 ] = ImageModif[ (yl+x*i/in)*4+3 ];
			blg_ySrc = (yl+x*i/in)*4;
			blg_yDst = (ybase+x)*4;
			//$BLG - v4.6a4
			//Removed the next part of my original optimization (v4.6a2) - replaced it with assembler code: BLG_Copy32Bits().
			// -> 5% performance enhancement.
			//bufn[ blg_yDst+0 ] = ImageModif[ blg_ySrc+0 ];
			//bufn[ blg_yDst+1 ] = ImageModif[ blg_ySrc+1 ];
			//bufn[ blg_yDst+2 ] = ImageModif[ blg_ySrc+2 ];
			//bufn[ blg_yDst+3 ] = ImageModif[ blg_ySrc+3 ];
			BLG_Copy32Bits(ImageModif + blg_ySrc, bufn + blg_yDst);
		}
	}
  //$BLG - v4.6a4
  //Above forgotten "optimization" - former optimized code has been restored/modified.
	//gluScaleImage(GL_RGBA, i, j, GL_UNSIGNED_BYTE, ImageModif, in, jn, GL_UNSIGNED_BYTE, bufn);
	*/

	free(ImageModif);
	ImageModif	= (unsigned char *)bufn;
	Width	= in;
	Height	= jn;

	return true;
}