So i'm wondering what I can do to optimize it further? I've unrolled all the loops so it's pure math and variable look ups.
I've been advised to use the vfpu, but the guy i was speaking with only had experience with the official sdk, and wasn't sure of whether pspdev has that ability. Does it?
If so, could you please provide me with a short example, even if it's just adding one integer to another on the vfpu so I can get started with it.
thanks
Code: Select all
void Multiply(Matrix *mat)
{
Matrix new_mat;
new_mat.grid[0][0]=(grid[0][0]*mat->grid[0][0]) + (grid[1][0]*mat->grid[0][1]) + (grid[2][0]*mat->grid[0][2]) + (grid[3][0]*mat->grid[0][3]);
new_mat.grid[0][1]=(grid[0][1]*mat->grid[0][0]) + (grid[1][1]*mat->grid[0][1]) + (grid[2][1]*mat->grid[0][2]) + (grid[3][1]*mat->grid[0][3]);
new_mat.grid[0][2]=(grid[0][2]*mat->grid[0][0]) + (grid[1][2]*mat->grid[0][1]) + (grid[2][2]*mat->grid[0][2]) + (grid[3][2]*mat->grid[0][3]);
new_mat.grid[0][3]=(grid[0][3]*mat->grid[0][0]) + (grid[1][3]*mat->grid[0][1]) + (grid[2][3]*mat->grid[0][2]) + (grid[3][3]*mat->grid[0][3]);
new_mat.grid[1][0]=(grid[0][0]*mat->grid[1][0]) + (grid[1][0]*mat->grid[1][1]) + (grid[2][0]*mat->grid[1][2]) + (grid[3][0]*mat->grid[1][3]);
new_mat.grid[1][1]=(grid[0][1]*mat->grid[1][0]) + (grid[1][1]*mat->grid[1][1]) + (grid[2][1]*mat->grid[1][2]) + (grid[3][1]*mat->grid[1][3]);
new_mat.grid[1][2]=(grid[0][2]*mat->grid[1][0]) + (grid[1][2]*mat->grid[1][1]) + (grid[2][2]*mat->grid[1][2]) + (grid[3][2]*mat->grid[1][3]);
new_mat.grid[1][3]=(grid[0][3]*mat->grid[1][0]) + (grid[1][3]*mat->grid[1][1]) + (grid[2][3]*mat->grid[1][2]) + (grid[3][3]*mat->grid[1][3]);
new_mat.grid[2][0]=(grid[0][0]*mat->grid[2][0]) + (grid[1][0]*mat->grid[2][1]) + (grid[2][0]*mat->grid[2][2]) + (grid[3][0]*mat->grid[2][3]);
new_mat.grid[2][1]=(grid[0][1]*mat->grid[2][0]) + (grid[1][1]*mat->grid[2][1]) + (grid[2][1]*mat->grid[2][2]) + (grid[3][1]*mat->grid[2][3]);
new_mat.grid[2][2]=(grid[0][2]*mat->grid[2][0]) + (grid[1][2]*mat->grid[2][1]) + (grid[2][2]*mat->grid[2][2]) + (grid[3][2]*mat->grid[2][3]);
new_mat.grid[2][3]=(grid[0][3]*mat->grid[2][0]) + (grid[1][3]*mat->grid[2][1]) + (grid[2][3]*mat->grid[2][2]) + (grid[3][3]*mat->grid[2][3]);
new_mat.grid[3][0]=(grid[0][0]*mat->grid[3][0]) + (grid[1][0]*mat->grid[3][1]) + (grid[2][0]*mat->grid[3][2]) + (grid[3][0]*mat->grid[3][3]);
new_mat.grid[3][1]=(grid[0][1]*mat->grid[3][0]) + (grid[1][1]*mat->grid[3][1]) + (grid[2][1]*mat->grid[3][2]) + (grid[3][1]*mat->grid[3][3]);
new_mat.grid[3][2]=(grid[0][2]*mat->grid[3][0]) + (grid[1][2]*mat->grid[3][1]) + (grid[2][2]*mat->grid[3][2]) + (grid[3][2]*mat->grid[3][3]);
new_mat.grid[3][3]=(grid[0][3]*mat->grid[3][0]) + (grid[1][3]*mat->grid[3][1]) + (grid[2][3]*mat->grid[3][2]) + (grid[3][3]*mat->grid[3][3]);
grid[0][0] = new_mat.grid[0][0];
grid[0][1] = new_mat.grid[0][1];
grid[0][2] = new_mat.grid[0][2];
grid[0][3] = new_mat.grid[0][3];
grid[1][0] = new_mat.grid[1][0];
grid[1][1] = new_mat.grid[1][1];
grid[1][2] = new_mat.grid[1][2];
grid[1][3] = new_mat.grid[1][3];
grid[2][0] = new_mat.grid[2][0];
grid[2][1] = new_mat.grid[2][1];
grid[2][2] = new_mat.grid[2][2];
grid[2][3] = new_mat.grid[2][3];
grid[3][0] = new_mat.grid[3][0];
grid[3][1] = new_mat.grid[3][1];
grid[3][2] = new_mat.grid[3][2];
grid[3][3] = new_mat.grid[3][3];
}