I've been doing basic VFPU stuff for my renderer... But now I'd need some additional complexity, ie. conditions...
Here's my visibility check function, but I'm not sure if the condition check for the division by zero (The seperated part where I break the assembly and inset c++ in between...) could be done more efficiently or more nicely :)
...of course other optimization ideas are welcome as well.
Code: Select all
...
float length(0.0);
__asm__ volatile (
//Load Triangles
"ulv.q R100, 0x0(%1)\n" //vert1
"ulv.q R101, 0x0(%2)\n" //vert2
"ulv.q R102, 0x0(%3)\n" //vert3
"ulv.q R103, 0x0(%4)\n" //iViewVector
//make vectors from vertices
"vsub.t R100, R100, R101\n"
"vsub.t R101, R101, R102\n"
//cross to get normal
"vcrsp.t R200, R100, R101\n"
//length...:
"vmul.t R100, R200, R200\n"
"vadd.s S100, S100, S110\n"
"vadd.s S100, S100, S120\n"
//---------------------------------------------------
//Conditional start
//---------------------------------------------------
"mfv %0, S100\n"
: "=r" (length) : "r" (aVertex1), "r" (aVertex2), "r" (aVertex3), "r" (&iViewVector)
);
//CONDITION to prevent division by zero
if(0.0 != length)
{
//---------------------------------------------------
//Conditional end
//---------------------------------------------------
__asm__ volatile ([/b]
// 1/sqrt
"vrsq.s S100, S100\n"
//normalize
"vscl.t R200, R200, S100\n"
//Determine the angle between
"vdot.t S200, R200, R103\n"
//Return
"usv.s S200, 0x0(%0)\n"
: : "r" (aReturnValue)
);
}
else
{
*aReturnValue = 0.0;
}
...