//---------------------------------------------------------------------------
//These materials/shaders are part of the NEW InstanceManager implementation
//Written by Matias N. Goldberg ("dark_sylinc")
//---------------------------------------------------------------------------

#include "HLSL_SM4Support.hlsl"

//---------------------------------------------
//Vertex Shader Input
//---------------------------------------------
struct VS_INPUT
{
	float4 Position	:	POSITION;
	float3 Normal	:	NORMAL;
#ifdef BONE_TWO_WEIGHTS
	float4 weights		: 	BLENDWEIGHT;
#endif
	float2 uv0		:	TEXCOORD0;
	float4 m03		:	TEXCOORD1; //m03.w is always 0

	float2 mOffset		:	TEXCOORD2;
	
#ifdef BONE_MATRIX_LUT
	float4 worldMatrix0	:	TEXCOORD3;
	float4 worldMatrix1	:	TEXCOORD4;
	float4 worldMatrix2	:	TEXCOORD5;
#endif
};

#include "InstancingVertexInterpolators.cg"
#ifdef ST_DUAL_QUATERNION
#include "DualQuaternionSkinning_Shadow.cg"
#endif

#ifdef DEPTH_SHADOWCASTER
	uniform SAMPLER2D(matrixTexture, 0);
#else
	uniform SAMPLER2D(matrixTexture, 2);
#endif

//---------------------------------------------
//Main Vertex Shader
//---------------------------------------------
VS_OUTPUT main_vs( in VS_INPUT input,
				   uniform float4x4 viewProjMatrix

#if defined( DEPTH_SHADOWCASTER ) || defined( DEPTH_SHADOWRECEIVER )
				,  uniform float4 depthRange
#endif
#ifdef DEPTH_SHADOWRECEIVER
				,  uniform float4x4 texViewProjMatrix
#endif
				)
{
	VS_OUTPUT output;
	float4 worldPos;
	float3 worldNorm;


#ifdef ST_DUAL_QUATERNION
	float2x4 blendDQ;	
	blendDQ[0] = tex2Dlod( matrixTexture, float4(float2(input.m03.x, 0.0) + input.mOffset, 0, 0) );
	blendDQ[1] = tex2Dlod( matrixTexture, float4(float2(input.m03.y, 0.0) + input.mOffset, 0, 0) );
#ifdef BONE_TWO_WEIGHTS
	float2x4 blendDQ2;
	//Use the empty parts of m03, z and w, for the second dual quaternion
	blendDQ2[0] = tex2Dlod( matrixTexture, float4(float2(input.m03.z, 0.0) + input.mOffset, 0, 0) );
	blendDQ2[1] = tex2Dlod( matrixTexture, float4(float2(input.m03.w, 0.0) + input.mOffset, 0, 0) );
	
	//Accurate antipodality handling. For speed increase, remove the following line
	if (dot(blendDQ[0], blendDQ2[0]) < 0.0) blendDQ2 *= -1.0;
	
	//Blend the dual quaternions based on the weights
	blendDQ *= input.weights.x;
	blendDQ += input.weights.y*blendDQ2;
	//Normalize the resultant dual quaternion
	blendDQ /= length(blendDQ[0]);
#endif
	worldPos = float4(calculateBlendPosition(input.Position, blendDQ), 1.0);
	worldNorm = calculateBlendNormal(input.Normal, blendDQ);
#else
	//! [world_pos]
	float3x4 worldMatrix;
	worldMatrix[0] = tex2Dlod( matrixTexture, float4(input.m03.xw + input.mOffset, 0, 0) );
	worldMatrix[1] = tex2Dlod( matrixTexture, float4(input.m03.yw + input.mOffset, 0, 0) );
	worldMatrix[2] = tex2Dlod( matrixTexture, float4(input.m03.zw + input.mOffset, 0, 0) );

	worldPos = float4( mul( worldMatrix, input.Position ).xyz, 1.0f );
	//! [world_pos]
	worldNorm= mul( (float3x3)(worldMatrix), input.Normal );
#endif

#ifdef BONE_MATRIX_LUT
	float3x4 worldCompMatrix;
	worldCompMatrix[0] = input.worldMatrix0;
	worldCompMatrix[1] = input.worldMatrix1;
	worldCompMatrix[2] = input.worldMatrix2;
	
	worldPos =  float4( mul( worldCompMatrix, worldPos ).xyz, 1.0f );
	worldNorm = mul( (float3x3)(worldCompMatrix), worldNorm );
	
#endif


	//Transform the position
	output.ps.Position		= mul( viewProjMatrix, worldPos );

#ifndef DEPTH_SHADOWCASTER
	output.ps.uv0		= input.uv0;

	//Pass Normal and position for Blinn Phong lighting
	output.ps.Normal	= normalize(worldNorm);
	output.ps.vPos		= worldPos.xyz;
	
	#ifdef DEPTH_SHADOWRECEIVER
		// Calculate the position of vertex in light space to do shadows
		output.ps.lightSpacePos = mul( texViewProjMatrix, worldPos );
	#endif
#endif

	return output;
}