VFPU playground, code generation for gas-unsupported opcodes

Discuss the development of new homebrew software, tools and libraries.

Moderators: cheriff, TyRaNiD

holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

VFPU playground, code generation for gas-unsupported opcodes

Post by holger »

Hi,

this codelet allows you to emit asm instructions at comile- or runtime, even if the GNU assembler does not yet supports them. Thus we can easily play with the VFPU instruction set. The example below demonstrates this by initializing the VFPU vector register set to zero and then loading an identity matrix in a single instruction.

This raw example only contains some of the most basic instructions and register names (only GPRs, vector and matrix Quadword addressing). Nevertheless you should get the idea how to add new opcodes and play with them.

All opcode and instruction definitions are defined in codegen.h, here they also can get documented. To try, create a directory pspgl/test-vfpu/ and copy the following files into this folder:

Makefile:

Code: Select all

ARCH = psp-
CC = $(ARCH)gcc
PSP_INSTALL = ../tools/psp-install
RM = rm -f

PSPPATH := $(shell psp-config --pspsdk-path)
LIBS = -lpspdebug -lpspdisplay -lpspge -lpspsdk -lpspctrl -lm -lc -lpspuser -lpspkernel
CFLAGS = -g -Wall -O2 -MD -I$(PSPPATH)/include 
LFLAGS = -g -Wall -O2 -L$(PSPPATH)/lib $(LIBS)

TARGET = test-vfpu 
OBJS = main.o

BUILDDATE = $(shell date "+%Y/%m/%d %k:%M:%S")

PSPSDK=$(shell psp-config --pspsdk-path)


all: $(TARGET)

.c.o:
	$&#40;CC&#41; $&#40;CFLAGS&#41; -c $<

$&#40;TARGET&#41;&#58; $&#40;OBJS&#41;
	$&#40;CC&#41; $&#40;OBJS&#41; $&#40;LFLAGS&#41; -o $@

install&#58; all
	$&#40;PSP_INSTALL&#41; $&#40;TARGET&#41; --eboot-title="$&#40;TARGET&#41; $&#40;BUILDDATE&#41;"

clean&#58;
	$&#40;RM&#41; $&#40;TARGET&#41; *.d *.o *.a *.elf *.sfo EBOOT.PBP

-include $&#40;wildcard *.d&#41; dummy


codegen.h:

Code: Select all

#ifndef __codegen_h__
#define __codegen_h__


/* GPR register set */

#define  R_zero 0
#define  R_at   1
#define  R_v0   2
#define  R_v1   3
#define  R_a0   4
#define  R_a1   5
#define  R_a2   6
#define  R_a3   7

#define  R_a4   8
#define  R_a5   9
#define  R_v6   10
#define  R_v7   11
#define  R_t0   12
#define  R_t1   13
#define  R_t2   14
#define  R_t3   15

#define  R_s0   16
#define  R_s1   17
#define  R_s2   18
#define  R_s3   19
#define  R_s4   20
#define  R_s5   21
#define  R_s6   22
#define  R_s7   23

#define  R_t8   24
#define  R_t9   25
#define  R_k0   26
#define  R_k1   27
#define  R_gp   28
#define  R_sp   29
#define  R_s8   30
#define  R_ra   31


/* VFPU registers, Quadword addressing */
#define  Q_C000  0	/* First digit specifies matrix, second the row */
#define  Q_C010  1
#define  Q_C020  2
#define  Q_C030  3
#define  Q_C100  4
#define  Q_C110  5
#define  Q_C120  6
#define  Q_C130  7

#define  Q_C200  8
#define  Q_C210  9
#define  Q_C220  10
#define  Q_C230  11
#define  Q_C300  12
#define  Q_C310  13
#define  Q_C320  14
#define  Q_C330  15

#define  Q_C400  16
#define  Q_C410  17
#define  Q_C420  18
#define  Q_C430  19
#define  Q_C500  20
#define  Q_C510  21
#define  Q_C520  22
#define  Q_C530  23

#define  Q_C600  24
#define  Q_C610  25
#define  Q_C620  26
#define  Q_C630  27
#define  Q_C700  28
#define  Q_C710  29
#define  Q_C720  30
#define  Q_C730  31

#define  Q_R000  32	/* First Digit specifies matrix, third the column */
#define  Q_R001  33
#define  Q_R002  34
#define  Q_R003  35
#define  Q_R100  36
#define  Q_R101  37
#define  Q_R102  38
#define  Q_R103  39

#define  Q_R200  40
#define  Q_R201  41
#define  Q_R202  42
#define  Q_R203  43
#define  Q_R300  44
#define  Q_R301  45
#define  Q_R302  46
#define  Q_R303  47

#define  Q_R400  48
#define  Q_R401  49
#define  Q_R402  50
#define  Q_R403  51
#define  Q_R500  52
#define  Q_R501  53
#define  Q_R502  54
#define  Q_R503  55

#define  Q_R600  56
#define  Q_R601  57
#define  Q_R602  58
#define  Q_R603  59
#define  Q_R700  60
#define  Q_R701  61
#define  Q_R702  62
#define  Q_R703  63


/* VFPU registers, 4x4 Matrix &#40;Quad&#41; addressing */
#define  Q_M000  0	/* First digit specifies matrix */
#define  Q_M100  4
#define  Q_M200  8
#define  Q_M300  12
#define  Q_M400  16
#define  Q_M500  20
#define  Q_M600  24
#define  Q_M700  28

#define  Q_E000  32
#define  Q_E100  36
#define  Q_E200  40
#define  Q_E300  44
#define  Q_E400  48
#define  Q_E500  52
#define  Q_E600  56
#define  Q_E700  60


/*
+-------------+------------+---------+---------------------------------------+
|31         26|25        21|20     16|15                                   0 |
+-------------+------------+---------+---------------------------------------+
| opcode 0x8c |  base&#91;4-0&#93; | rt&#91;4-0&#93; |             offset&#91;15-0&#93;              |
+-------------+------------+---------+---------------------------------------+

  LoadWord Relative to Address in General Purpose Register

    lw %rt, offset&#40;%base&#41;

	%rt&#58;	GPR Target Register &#40;0...31&#41;
	%base&#58;	GPR, specifies Source Address Base
	offset&#58;	signed Offset added to Source Address Base

    %rt <- word_at_address &#40;offset + %base&#41;
*/
#define lw&#40;rt,offset,base&#41; \
	&#40;0x8c000000 | &#40;&#40;base&#41; << 21&#41; | &#40;&#40;rt&#41; << 16&#41; | &#40;&#40;offset&#41; & 0xffff&#41;&#41;


/*
+-------------+------------+---------+---------------------------------------+
|31         26|25        21|20     16|15                                   0 |
+-------------+------------+---------+---------------------------------------+
| opcode 0xac |  base&#91;4-0&#93; | rt&#91;4-0&#93; |             offset&#91;15-0&#93;              |
+-------------+------------+---------+---------------------------------------+

  StoreWord Relative to Address in General Purpose Register

    sw %rt, offset&#40;%base&#41;

	%rt&#58;	GPR Target Register &#40;0...31&#41;
	%base&#58;	GPR, specifies Source Address Base
	offset&#58;	signed Offset added to Source Address Base

    word_at_address &#40;offset + %base&#41; <- %rt
*/
#define sw&#40;rt,offset,base&#41; \
	&#40;0xac000000 | &#40;&#40;base&#41; << 21&#41; | &#40;&#40;rt&#41; << 16&#41; | &#40;&#40;offset&#41; & 0xffff&#41;&#41;


/*
+-------------+------------+---------+---------------------------------------+
|31         26|25        21|20     16|15                                   0 |
+-------------+------------+---------+---------------------------------------+
| opcode 0x42 |   rs&#91;4-0&#93;  | rt&#91;4-0&#93; |              immediate                |
+-------------+------------+---------+---------------------------------------+

  Add Immediate Unsigned Word

    addiu %rt, %rs, immediate

	%rt&#58;		GPR Target Register &#40;0...31&#41;
	%rs&#58;		GPR Source Register &#40;0...31&#41;
	immediate&#58;	value added to Source Register

    %rt <- %rs + sign_extended&#40;immediate&#41;
*/
#define addiu&#40;rt,rs,immediate&#41; \
	&#40;0x24000000 | &#40;&#40;rs&#41; << 21&#41; | &#40;&#40;rt&#41; << 16&#41; | &#40;&#40;immediate&#41; & 0xffff&#41;&#41;


/*
+-------------+-----------+---------+----------------------------+-----+-----+
|31         26|25       21|20     16|15                        2 |  1  |  0  |
+-------------+-----------+---------+----------------------------+-----+-----+
| opcode 0xd8 | base&#91;4-0&#93; | vt&#91;4-0&#93; |         offset&#91;15-2&#93;       |  0  |vt&#91;5&#93;|
+-------------+-----------+---------+----------------------------+-----+-----+

  LoadVector.Quadword Relative to Address in General Purpose Register
  Final Address needs to be 64-byte aligned.

    lv.q %vfpu_rt, offset&#40;%base&#41;

	%fpu_rt&#58;	VFPU Vector Target Register &#40;column0-31/row32-63&#41;
	%base&#58;		GPR, specifies Source Address Base
	offset&#58;		signed Offset added to Source Address Base

    fpu_vtr <- vector_at_address &#40;offset + %gpr&#41;
*/
#define lv_q&#40;vfpu_rt,offset,base,cache_policy&#41;		        \
        &#40;0xd8000000 |                				\
	 &#40;&#40;base&#41; << 21&#41; |					\
	 &#40;&#40;&#40;vfpu_rt&#41; & 0x1f&#41; << 16&#41; | &#40;&#40;vfpu_vtreg&#41; >> 4&#41; |	\
	 &#40;&#40;offset&#41; << 2&#41; |					\
	 &#40;&#40;cache_policy&#41; << 1&#41;&#41;


/*
+-------------+-----------+---------+----------------------------+-----+-----+
|31         26|25       21|20     16|15                        2 |  1  |  0  |
+-------------+-----------+---------+----------------------------+-----+-----+
| opcode 0xf8 | base&#91;4-0&#93; | vt&#91;4-0&#93; |         offset&#91;15-2&#93;       | c_p |vt&#91;5&#93;|
+-------------+-----------+---------+----------------------------+-----+-----+

  StoreVector.Quadword Relative to Address in General Purpose Register
  Final Address needs to be 64-byte aligned.

    sv.q %vfpu_rt, offset&#40;%base&#41;, cache_policy

	%fpu_rt&#58;	VFPU Vector Target Register &#40;column0-31/row32-63&#41;
	%base&#58;		specifies Source Address Base
	offset&#58;		signed Offset added to Source Address Base
	cache_policy&#58;	0 = write-through, 1 = write-back

    vector_at_address &#40;offset + %gpr&#41; <- fpu_vtr
*/
#define sv_q&#40;vfpu_rt,offset,base,cache_policy&#41;			\
	&#40;0xf8000000 |						\
	 &#40;&#40;base&#41; << 21&#41; |					\
	 &#40;&#40;&#40;vfpu_rt&#41; & 0x1f&#41; << 16&#41; | &#40;&#40;vfpu_rt&#41; >> 4&#41; |	\
	 &#40;&#40;offset&#41; << 2&#41; |					\
	 &#40;&#40;cache_policy&#41; << 1&#41;&#41;


/*
+-------------------------------------------------------------+--------------+
|31                                                         7 | 6         0  |
+-------------------------------------------------------------+--------------+
|              opcode 0xd0060000                              | vfpu_rt&#91;6-0&#93; |
+-------------------------------------------------------------+--------------+

  SetVectorZero.Single/Pair/Triple/Quad

    vzero.s %vfpu_rt	; Set 1 Vector Component to 0.0f
    vzero.p %vfpu_rt	; Set 2 Vector Components to 0.0f
    vzero.t %vfpu_rt	; Set 3 Vector Components to 0.0f
    vzero.q %vfpu_rt	; Set 4 Vector Components to 0.0f

	%vfpu_rt&#58;	VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rt&#93; <- 0.0f
*/
#define vzero_s&#40;vfpu_rt&#41;  &#40;0xd0060000 | &#40;vfpu_rt&#41;&#41;
#define vzero_p&#40;vfpu_rt&#41;  &#40;0xd0060080 | &#40;vfpu_rt&#41;&#41;
#define vzero_t&#40;vfpu_rt&#41;  &#40;0xd0068000 | &#40;vfpu_rt&#41;&#41;
#define vzero_q&#40;vfpu_rt&#41;  &#40;0xd0068080 | &#40;vfpu_rt&#41;&#41;


/*
+-------------------------------------------------------------+--------------+
|31                                                         7 | 6         0  |
+-------------------------------------------------------------+--------------+
|              opcode 0xd0070000                              | vfpu_rt&#91;6-0&#93; |
+-------------------------------------------------------------+--------------+

  SetVectorOne.Single/Pair/Triple/Quad

    vone.s %vfpu_rt	; Set 1 Vector Component to 1.0f
    vone.p %vfpu_rt	; Set 2 Vector Components to 1.0f
    vone.t %vfpu_rt	; Set 3 Vector Components to 1.0f
    vone.q %vfpu_rt	; Set 4 Vector Components to 1.0f

	%vfpu_rt&#58;	VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rt&#93; <- 0.0f
*/
#define vone_s&#40;vfpu_rt&#41;  &#40;0xd0070000 | &#40;vfpu_rt&#41;&#41;
#define vone_p&#40;vfpu_rt&#41;  &#40;0xd0070080 | &#40;vfpu_rt&#41;&#41;
#define vone_t&#40;vfpu_rt&#41;  &#40;0xd0078000 | &#40;vfpu_rt&#41;&#41;
#define vone_q&#40;vfpu_rt&#41;  &#40;0xd0078080 | &#40;vfpu_rt&#41;&#41;


/*
+-------------------------------------------------------------+--------------+
|31                                                         7 | 6         0  |
+-------------------------------------------------------------+--------------+
|              opcode 0xf3868080                              | vfpu_rt&#91;6-0&#93; |
+-------------------------------------------------------------+--------------+

  SetMatrixZero.Single/Pair/Triple/Quad

    vmzero.p %vfpu_rt	; Set 2x2 Submatrix to 0.0f
    vmzero.t %vfpu_rt	; Set 3x3 Submatrix to 0.0f
    vmzero.q %vfpu_rt	; Set 4x4 Matrix to 0.0f

	%vfpu_rt&#58;	VFPU Matrix Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_mtx&#91;%vfpu_rt&#93; <- 0.0f
*/
#define vmzero_p&#40;vfpu_rt&#41;  &#40;0xf3860080 | &#40;vfpu_rt&#41;&#41;
#define vmzero_t&#40;vfpu_rt&#41;  &#40;0xf3868000 | &#40;vfpu_rt&#41;&#41;
#define vmzero_q&#40;vfpu_rt&#41;  &#40;0xf3868080 | &#40;vfpu_rt&#41;&#41;


/*
+-------------------------------------------------------------+--------------+
|31                                                         7 | 6         0  |
+-------------------------------------------------------------+--------------+
|              opcode 0xf3838080                              | vfpu_rt&#91;6-0&#93; |
+-------------------------------------------------------------+--------------+

    vmidt.p %vfpu_rt	; Set 2x2 Submatrix to Identity
    vmidt.t %vfpu_rt	; Set 3x3 Submatrix to Identity
    vmidt.q %vfpu_rt	; Set 4x4 Matrix to Identity

	%vfpu_rt&#58;	VFPU Matrix Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_mtx&#91;%vfpu_rt&#93; <- identity matrix
*/
#define vmidt_p&#40;vfpu_rt&#41;  &#40;0xf3830080 | &#40;vfpu_rt&#41;&#41;
#define vmidt_t&#40;vfpu_rt&#41;  &#40;0xf3838000 | &#40;vfpu_rt&#41;&#41;
#define vmidt_q&#40;vfpu_rt&#41;  &#40;0xf3838080 | &#40;vfpu_rt&#41;&#41;


/* helpers for direct __asm__ use&#58; */
#define _cgen_stringify&#40;x&#41;  #x
#define cgen_stringify&#40;x&#41;   _cgen_stringify&#40;x&#41;
#define cgen_asm&#40;x&#41;         ".loc 1 " cgen_stringify&#40;__LINE__&#41; " 0\n\t.word " cgen_stringify&#40;x&#41; "\n\t"

#endif


main.c:

Code: Select all

#include <pspkernel.h>
#include <pspdebug.h>
#include <pspctrl.h>
#include <pspdisplay.h>
#include "codegen.h"

/* XXX SDK BUG&#58; In theory everything should work when main is running in userspace.
   Unfortunately the PSP hangs if we register the exception handler in the _init constructor, so we need to
   call pspDebugInstallErrorHandler&#40;&#41; in main&#40;&#41;.
 */
PSP_MAIN_THREAD_ATTR&#40;/*PSP_THREAD_ATTR_USER |*/ PSP_THREAD_ATTR_VFPU&#41;;
PSP_MODULE_INFO&#40;"VFPU-test", 0x1000, 1, 1&#41;;


static int exit_callback&#40;int arg1, int arg2, void *common&#41;
&#123;
	sceKernelExitGame&#40;&#41;;
	return 0;
&#125;


static int callback_thread &#40;SceSize args, void *argp&#41;
&#123;
	int cbid = sceKernelCreateCallback&#40;"Exit Callback", exit_callback, NULL&#41;;

	sceKernelRegisterExitCallback&#40;cbid&#41;;
	sceKernelSleepThreadCB&#40;&#41;;
	return 0;
&#125;


/* Sets up the callback thread and returns its thread id */
static void setup_callbacks &#40;void&#41; __attribute__&#40;&#40;constructor&#41;&#41;;
static void setup_callbacks &#40;void&#41;
&#123;
	int thid = sceKernelCreateThread&#40;"update_thread", callback_thread, 0x11, 0xFA0, THREAD_ATTR_USER, 0&#41;;

	if &#40;thid >= 0&#41;
		sceKernelStartThread&#40;thid, 0, 0&#41;;
&#125;


static void back_to_kernel &#40;void&#41; __attribute__&#40;&#40;destructor&#41;&#41;;
static void back_to_kernel &#40;void&#41;
&#123;
	sceKernelExitGame&#40;&#41;;
&#125;


static void exception_handler &#40;PspDebugRegBlock *regs&#41;
&#123;
	pspDebugScreenInit&#40;&#41;;
	pspDebugScreenSetBackColor&#40;0x00FF0000&#41;;
	pspDebugScreenSetTextColor&#40;0xFFFFFFFF&#41;;
	pspDebugScreenClear&#40;&#41;;
	pspDebugScreenPrintf&#40;"Exception Details&#58;\n"&#41;;
	pspDebugDumpException&#40;regs&#41;;
&#125;


void vfpu_init &#40;void&#41;
&#123;
        __asm__ volatile &#40;
		cgen_asm&#40;vmzero_q&#40;Q_M000&#41;&#41;	/* access register array as matrices for speed */
		cgen_asm&#40;vmzero_q&#40;Q_M100&#41;&#41;
		cgen_asm&#40;vmzero_q&#40;Q_M200&#41;&#41;
		cgen_asm&#40;vmzero_q&#40;Q_M300&#41;&#41;
		cgen_asm&#40;vmzero_q&#40;Q_M400&#41;&#41;
		cgen_asm&#40;vmzero_q&#40;Q_M500&#41;&#41;
		cgen_asm&#40;vmzero_q&#40;Q_M600&#41;&#41;
		cgen_asm&#40;vmzero_q&#40;Q_M700&#41;&#41;
	&#41;;
&#125;


void vfpu_save_regs &#40;float vfpu_regs &#91;32&#93;&#91;4&#93;&#41;
&#123;
        register void *ptr __asm__ &#40;"a0"&#41; = vfpu_regs;
        __asm__ volatile &#40;
		cgen_asm&#40;sv_q&#40;0, 0 * 4, R_a0, 0&#41;&#41;
		cgen_asm&#40;sv_q&#40;1, 1 * 4, R_a0, 0&#41;&#41;
		cgen_asm&#40;sv_q&#40;2, 2 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;3, 3 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;4, 4 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;5, 5 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;6, 6 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;7, 7 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;8, 8 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;9, 9 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;10, 10 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;11, 11 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;12, 12 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;13, 13 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;14, 14 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;15, 15 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;16, 16 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;17, 17 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;18, 18 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;19, 19 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;20, 20 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;21, 21 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;22, 22 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;23, 23 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;24, 24 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;25, 25 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;26, 26 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;27, 27 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;28, 28 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;29, 29 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;30, 30 * 4, R_a0, 0&#41;&#41;
                cgen_asm&#40;sv_q&#40;31, 31 * 4, R_a0, 0&#41;&#41;
		&#58; "=r"&#40;ptr&#41; &#58; "r"&#40;ptr&#41; &#58; "memory"&#41;;
&#125;


void vfpu_diff &#40;float r1 &#91;32&#93;&#91;4&#93;, float r2 &#91;32&#93;&#91;4&#93;&#41;
&#123;
        int i, j;

        for &#40;i=0; i<32; i++&#41; &#123;
                for &#40;j=0; j<4; j++&#41; &#123;
			if &#40;r1&#91;i&#93;&#91;j&#93; != r2&#91;i&#93;&#91;j&#93;&#41;
				break;
		&#125;
		if &#40;j<4&#41;
			pspDebugScreenPrintf&#40;"- %i&#58; % 5.5f % 5.5f % 5.5f % 5.5f\n",
						i, r1&#91;i&#93;&#91;0&#93;, r1&#91;i&#93;&#91;1&#93;, r1&#91;i&#93;&#91;2&#93;, r1&#91;i&#93;&#91;3&#93;&#41;;
	&#125;

        for &#40;i=0; i<32; i++&#41; &#123;
                for &#40;j=0; j<4; j++&#41; &#123;
			if &#40;r1&#91;i&#93;&#91;j&#93; != r2&#91;i&#93;&#91;j&#93;&#41;
				break;
		&#125;
		if &#40;j<4&#41;
			pspDebugScreenPrintf&#40;"+ %i&#58; % 5.5f % 5.5f % 5.5f % 5.5f\n",
						i, r2&#91;i&#93;&#91;0&#93;, r2&#91;i&#93;&#91;1&#93;, r2&#91;i&#93;&#91;2&#93;, r2&#91;i&#93;&#91;3&#93;&#41;;
	&#125;
&#125;


static float vfpu_regs0 &#91;32&#93;&#91;4&#93; __attribute__&#40;&#40;aligned&#40;64&#41;&#41;&#41;;
static float vfpu_regs1 &#91;32&#93;&#91;4&#93; __attribute__&#40;&#40;aligned&#40;64&#41;&#41;&#41;;

/**
 *  ok... this function is the place to actually try the behaviour of some yet-unknown instructions.
 */
void vfpu_testcase &#40;void&#41;
&#123;
	__asm__&#40;cgen_asm&#40;vmidt_q&#40;Q_M100&#41;&#41;&#41;;
&#125;


int main &#40;int argc, char **argv&#41;
&#123;
	pspDebugInstallErrorHandler&#40;exception_handler&#41;;
	sceCtrlSetSamplingCycle&#40;0&#41;;
	sceCtrlSetSamplingMode&#40;PSP_CTRL_MODE_DIGITAL&#41;;

	pspDebugScreenInit&#40;&#41;;
	pspDebugScreenPrintf&#40;"VFPU test  --  vfpu_regs0 = %p, vfpu_regs1 = %p\n\n", vfpu_regs0, vfpu_regs1&#41;;
	pspDebugScreenPrintf&#40;"press O to run VFPU testcase or X to trap into breakpoint\n\n"&#41;;

	vfpu_init&#40;&#41;;

	while &#40;1&#41; &#123;
		SceCtrlData pad;

		sceCtrlReadBufferPositive&#40;&pad, 1&#41;;

		if &#40;pad.Buttons & PSP_CTRL_CIRCLE&#41; &#123;
			vfpu_save_regs&#40;vfpu_regs0&#41;;
			vfpu_testcase&#40;&#41;;
			vfpu_save_regs&#40;vfpu_regs1&#41;;
			vfpu_diff&#40;vfpu_regs0, vfpu_regs1&#41;;
		&#125;

		if &#40;pad.Buttons & PSP_CTRL_CROSS&#41;
			asm&#40;"break\n"&#41;; /* Cause a break exception, to check that the exception handler works... */

		sceDisplayWaitVblankStart&#40;&#41;;
	&#125;

	return 0;
&#125;

compile and install by typing "make install".
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

Thanks for the info holger. Im gonna try a few tests myself, Ill post any new findings as I go on.
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

this stuff in now in SVN: see pspgl/test-fvpu/ and pspgl/pspgl_codegen.h

http://svn.pspdev.org/filedetails.php?r ... rev=0&sc=1
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

I *think* i got vmmul working.

Code: Select all

/*
+--------------------------+--------------+--+--------------+-+--------------+
|31                     23 | 22        16 |15| 14         8 |7| 6         0  |
+--------------------------+--------------+--+--------------+-+--------------+
| opcode 0xf0000080 &#40;p&#41;    | vfpu_rt&#91;6-0&#93; | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xf0008000 &#40;t&#41;    | vfpu_rt&#91;6-0&#93; | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xf0008080 &#40;q&#41;    | vfpu_rt&#91;6-0&#93; | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+--------------------------+--------------+--+--------------+-+--------------+

	vmmul.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; multiply 2 2x2 Submatrices
	vmmul.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; multiply 2 3x3 Submatrices
	vmmul.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; multiply 2 4x4 Matrices

*/

#define vmmul_p&#40;vfpu_rd, vfpu_rs, vfpu_rt&#41; &#40;0xf0000080 | &#40;vfpu_rt << 16&#41; | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vmmul_t&#40;vfpu_rd, vfpu_rs, vfpu_rt&#41; &#40;0xf0008000 | &#40;vfpu_rt << 16&#41; | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vmmul_q&#40;vfpu_rd, vfpu_rs, vfpu_rt&#41; &#40;0xf0008080 | &#40;vfpu_rt << 16&#41; | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
please test though, im very tired at the moment =)

working on vsin/vcos next, that might help resolve a lot of the other opcodes.
User avatar
dot_blank
Posts: 498
Joined: Wed Sep 28, 2005 8:47 am
Location: Brasil

Post by dot_blank »

excellent documentation and
analysis of vfpu holger ;)
this is (imho) the single most
important coprocessor in the psp

you are doing a terrific work
of documenting registers and
instruction set (i especially like
your commenting ... mantissa and all :)

one question: could be usefull
to use gdb with your vfpu test ;)
10011011 00101010 11010111 10001001 10111010
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

MrMr[iCE] wrote: I *think* i got vmmul working.
applied.
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

one question: could be usefull
to use gdb with your vfpu test ;)
I have no idea what would need to get done to implement VFPU support in gdb, and the current quick'n'dirty approach works with surprisingly fast turnaround cycles... on the long run full VFPU support in the toolchain, assembler and debugger would be definitely cool.
Warren
Posts: 175
Joined: Sat Jan 24, 2004 8:26 am
Location: San Diego, CA

Post by Warren »

holger wrote: I have no idea what would need to get done to implement VFPU support in gdb, and the current quick'n'dirty approach works with surprisingly fast turnaround cycles... on the long run full VFPU support in the toolchain, assembler and debugger would be definitely cool.
Tyranid has already added support for VFPU instrs in gdb. I'm unfamiliar with GAS so I really have no clue how to add instrs to it properly or else i already would have done so. I might experiment over the next couple days though.
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

btw, has anybody of you seen some hints how one may catch VFPU exceptions? Or, even better, disable or avoid them at all?

When loading bitfields using the lv.q insn, e.g. the (unsigned long) testvector in the SVN testcase, it seems that in some cases an exception is thrown when the number is NaN and the PSP then locks up. But since there is a way to convert byte, short and integer numbers to float, there also must be some way to load them...
mrbrown
Site Admin
Posts: 1537
Joined: Sat Jan 17, 2004 11:24 am

Post by mrbrown »

Warren wrote:Tyranid has already added support for VFPU instrs in gdb. I'm unfamiliar with GAS so I really have no clue how to add instrs to it properly or else i already would have done so. I might experiment over the next couple days though.
Does the GDB stub support VFPU registers? If not, single-stepping VFPU instructions is pretty much useless.
Warren
Posts: 175
Joined: Sat Jan 24, 2004 8:26 am
Location: San Diego, CA

Post by Warren »

mrbrown wrote:Does the GDB stub support VFPU registers? If not, single-stepping VFPU instructions is pretty much useless.
It currently does not support dumping VFPU registers.
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

some more functions that ive tested:

Code: Select all

/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0100000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0100080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0108000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0108080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	Reciprocal.Single/Pair/Triple/Quad

	vrcp.s  %vfpu_rd, %vfpu_rs   ; calculate reciprocal &#40;1/z&#41; on single
	vrcp.p  %vfpu_rd, %vfpu_rs   ; calculate reciprocal &#40;1/z&#41; on pair
	vrcp.t  %vfpu_rd, %vfpu_rs   ; calculate reciprocal &#40;1/z&#41; on triple
	vrcp.q  %vfpu_rd, %vfpu_rs   ; calculate reciprocal &#40;1/z&#41; on quad

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- 1.0 / vfpu_regs&#91;%vfpu_rs&#93;
*/

#define vrcp_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0100000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrcp_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0100080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrcp_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0108000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrcp_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0108080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0140000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0140080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0148000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0148080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	Exp2.Single/Pair/Triple/Quad &#40;calculate 2 raised to the specified real number&#41;

	vexp2.s  %vfpu_rd, %vfpu_rs   ; calculate 2 ** y
	vexp2.p  %vfpu_rd, %vfpu_rs   ; calculate 2 ** y
	vexp2.t  %vfpu_rd, %vfpu_rs   ; calculate 2 ** y
	vexp2.q  %vfpu_rd, %vfpu_rs   ; calculate 2 ** y

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- 2^&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vexp2_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0140000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vexp2_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0140080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vexp2_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0148000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vexp2_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0148080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0150000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0150080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0158000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0158080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	Log2.Single/Pair/Triple/Quad &#40;calculate logarithm base 2 of the specified real number&#41;

	vlog2.s  %vfpu_rd, %vfpu_rs
	vlog2.p  %vfpu_rd, %vfpu_rs
	vlog2.t  %vfpu_rd, %vfpu_rs
	vlog2.q  %vfpu_rd, %vfpu_rs

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- log2&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vlog2_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0150000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vlog2_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0150080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vlog2_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0158000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vlog2_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0158080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0160000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0160080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0168000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0168080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	SquareRoot.Single/Pair/Triple/Quad

	vsqrt.s  %vfpu_rd, %vfpu_rs   ; calculate square root
	vsqrt.p  %vfpu_rd, %vfpu_rs   ; calculate square root
	vsqrt.t  %vfpu_rd, %vfpu_rs   ; calculate square root
	vsqrt.q  %vfpu_rd, %vfpu_rs   ; calculate square root

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- sqrt&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vsqrt_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0160000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsqrt_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0160080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsqrt_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0168000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsqrt_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0168080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

and here comes another run of ops ive tested:

Code: Select all

/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0110000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0110080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0118000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0118080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	ReciprocalSquareRoot.Single/Pair/Triple/Quad

	vrsq.s  %vfpu_rd, %vfpu_rs   ; calculate reciprocal sqrt &#40;1/sqrt&#40;x&#41;&#41; on single
	vrsq.p  %vfpu_rd, %vfpu_rs   ; calculate reciprocal sqrt &#40;1/sqrt&#40;x&#41;&#41; on pair
	vrsq.t  %vfpu_rd, %vfpu_rs   ; calculate reciprocal sqrt &#40;1/sqrt&#40;x&#41;&#41; on triple
	vrsq.q  %vfpu_rd, %vfpu_rs   ; calculate reciprocal sqrt &#40;1/sqrt&#40;x&#41;&#41; on quad

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- 1.0 / sqrt&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vrsq_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0110000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrsq_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0110080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrsq_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0118000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrsq_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0118080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0120000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0120080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0128000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0128080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	Sinus.Single/Pair/Triple/Quad

	vsin.s  %vfpu_rd, %vfpu_rs   ; calculate sin on single
	vsin.p  %vfpu_rd, %vfpu_rs   ; calculate sin on pair
	vsin.t  %vfpu_rd, %vfpu_rs   ; calculate sin on triple
	vsin.q  %vfpu_rd, %vfpu_rs   ; calculate sin on quad

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- sin&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;

	Note by John Kelley&#58; trig functions on the vfpu expect input values
	like vsin&#40;degrees/90&#41; or vsin&#40;2/PI * radians&#41;
*/


#define vsin_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0120000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsin_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0120080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsin_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0128000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsin_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0128080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0130000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0130080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0138000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0138080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	Cosine.Single/Pair/Triple/Quad

	vcos.s  %vfpu_rd, %vfpu_rs   ; calculate cos on single
	vcos.p  %vfpu_rd, %vfpu_rs   ; calculate cos on pair
	vcos.t  %vfpu_rd, %vfpu_rs   ; calculate cos on triple
	vcos.q  %vfpu_rd, %vfpu_rs   ; calculate cos on quad

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- cos&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;

	Note by John Kelley&#58; trig functions on the vfpu expect input values
	like vsin&#40;degrees/90&#41; or vsin&#40;2/PI * radians&#41;
*/

#define vcos_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0130000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vcos_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0130080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vcos_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0138000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vcos_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0138080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0170000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0170080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0178000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0178080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	ArcSin.Single/Pair/Triple/Quad

	vasin.s  %vfpu_rd, %vfpu_rs   ; calculate arcsin
	vasin.p  %vfpu_rd, %vfpu_rs   ; calculate arcsin
	vasin.t  %vfpu_rd, %vfpu_rs   ; calculate arcsin
	vasin.q  %vfpu_rd, %vfpu_rs   ; calculate arcsin

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- arcsin&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vasin_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0170000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vasin_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0170080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vasin_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0178000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vasin_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0178080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd0180000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0180080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0188000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd0188080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	NegativeReciprocal.Single/Pair/Triple/Quad

	vnrcp.s  %vfpu_rd, %vfpu_rs   ; calculate negative reciprocal
	vnrcp.p  %vfpu_rd, %vfpu_rs   ; calculate negative reciprocal
	vnrcp.t  %vfpu_rd, %vfpu_rs   ; calculate negative reciprocal
	vnrcp.q  %vfpu_rd, %vfpu_rs   ; calculate negative reciprocal

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- -1/vfpu_regs&#91;%vfpu_rs&#93;
*/

#define vnrcp_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0180000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vnrcp_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0180080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vnrcp_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0188000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vnrcp_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd0188080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd01a0000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01a0080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01a8000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01a8080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	NegativeSin.Single/Pair/Triple/Quad

	vnsin.s  %vfpu_rd, %vfpu_rs   ; calculate negative sin
	vnsin.p  %vfpu_rd, %vfpu_rs   ; calculate negative sin
	vnsin.t  %vfpu_rd, %vfpu_rs   ; calculate negative sin
	vnsin.q  %vfpu_rd, %vfpu_rs   ; calculate negative sin

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- sqrt&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vnsin_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01a0000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vnsin_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01a0080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vnsin_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01a8000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vnsin_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01a8080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd01c0000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01c0080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01c8000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01c8080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	ReciprocalExp2.Single/Pair/Triple/Quad

	vrexp2.s  %vfpu_rd, %vfpu_rs   ; calculate 1/&#40;2^y&#41;
	vrexp2.p  %vfpu_rd, %vfpu_rs   ; calculate 1/&#40;2^y&#41;
	vrexp2.t  %vfpu_rd, %vfpu_rs   ; calculate 1/&#40;2^y&#41;
	vrexp2.q  %vfpu_rd, %vfpu_rs   ; calculate 1/&#40;2^y&#41;

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- 1/exp2&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/

#define vrexp2_s&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01c0000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrexp2_p&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01c0080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrexp2_t&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01c8000 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vrexp2_q&#40;vfpu_rd, vfpu_rs&#41; &#40;0xd01c8080 | &#40;vfpu_rs << 8&#41; | &#40;vfpu_rd&#41;&#41;
please note that the sin/cos functions expect certain values. John Kelly pointed out you should pass in values like (degress/90.0) or (2/PI * radians)
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

Code: Select all

/*
+-----------------------------------------+--+--------------+-+--------------+
|31                                    16 |15| 14         8 |7| 6         0  |
+-----------------------------------------+--+--------------+-+--------------+
| opcode 0xd01a0000 &#40;s&#41;                   | 0| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01a0080 &#40;p&#41;                   | 0| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01a8000 &#40;t&#41;                   | 1| vfpu_rs&#91;6-0&#93; |0| vfpu_rd&#91;6-0&#93; |
| opcode 0xd01a8080 &#40;q&#41;                   | 1| vfpu_rs&#91;6-0&#93; |1| vfpu_rd&#91;6-0&#93; |
+-----------------------------------------+--+--------------+-+--------------+

	NegativeSin.Single/Pair/Triple/Quad

	vnsin.s  %vfpu_rd, %vfpu_rs   ; calculate negative sin
	vnsin.p  %vfpu_rd, %vfpu_rs   ; calculate negative sin
	vnsin.t  %vfpu_rd, %vfpu_rs   ; calculate negative sin
	vnsin.q  %vfpu_rd, %vfpu_rs   ; calculate negative sin

	%vfpu_rd&#58;   VFPU Vector Target Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
	%vfpu_rs&#58;   VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

	vfpu_regs&#91;%vfpu_rd&#93; <- sqrt&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;
*/
are you sure about this one? Shouldn't this read

Code: Select all

	vfpu_regs&#91;%vfpu_rd&#93; <- -sin&#40;vfpu_regs&#91;%vfpu_rs&#93;&#41;

?

All other insns are added to SVN.
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

heh my bad, copy n paste can be evil sometimes =)


got one more, and I'm done for today. Will resume more ops tomorrow.

Code: Select all

+------------------------+------------------+----+--------+---+--------------+
|31                   21 | 20            16 | 15 | 14   8 | 7 | 6         0  |
+------------------------+------------------+----+--------+---+--------------+
| opcode 0xd06 &#40;s&#41;       | constant &#40;0-31&#41;  |  0 |   0    | 0 | vfpu_rd&#91;6-0&#93; |
| opcode 0xd06 &#40;p&#41;       | constant &#40;0-31&#41;  |  0 |   0    | 1 | vfpu_rd&#91;6-0&#93; |
| opcode 0xd06 &#40;t&#41;       | constant &#40;0-31&#41;  |  1 |   0    | 0 | vfpu_rd&#91;6-0&#93; |
| opcode 0xd06 &#40;q&#41;       | constant &#40;0-31&#41;  |  1 |   0    | 1 | vfpu_rd&#91;6-0&#93; |
+------------------------+------------------+----+--------+---+--------------+

	StoreConstant.Single/Pair/Triple/Quad

	vcst.s %vfpu_rd, %a ; store constant into single
	vcst.p %vfpu_rd, %a ; store constant into pair
	vcst.t %vfpu_rd, %a ; store constant into triple
	vcst.q %vfpu_rd, %a ; store constant into quad

		%vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
		%a&#58;			VFPU Constant ID    Value
					================    ==========================================
					0  = n/a            0
					1  = HUGE           340282346638528859811704183484516925440.0
					2  = SQRT&#40;2&#41;        1.41421
					3  = 1/SQRT&#40;2&#41;      0.70711
					4  = 2/SQRT&#40;PI&#41;     1.12838
					5  = 2/PI           0.63662
					6  = 1/PI           0.31831
					7  = PI/4           0.78540
					8  = PI/2           1.57080
					9  = PI             3.14159
					10 = E              2,71828
					11 = LOG2E          1.44270
					12 = LOG10E         0.43429
					13 = LN2            0.69315
					14 = LN10           2.30259
					15 = 2*PI           6.28319
					16 = PI/6           0.52360
					17 = LOG10TWO       0.30103
					18 = LOG2TEN        3.32193
					19 = SQRT&#40;3&#41;/2      0.86603
					20-31 = n/a         0

	vfpu_regs&#91;%vfpu_rd&#93; <- constants&#91;%a&#93;
*/


#define vcst_s&#40;vfpu_rd, a&#41; &#40;0xd0600000 | &#40;&#40;a&#41; << 16&#41; | &#40;vfpu_rd&#41;&#41;
#define vcst_p&#40;vfpu_rd, a&#41; &#40;0xd0600080 | &#40;&#40;a&#41; << 16&#41; | &#40;vfpu_rd&#41;&#41;
#define vcst_t&#40;vfpu_rd, a&#41; &#40;0xd0608000 | &#40;&#40;a&#41; << 16&#41; | &#40;vfpu_rd&#41;&#41;
#define vcst_q&#40;vfpu_rd, a&#41; &#40;0xd0608080 | &#40;&#40;a&#41; << 16&#41; | &#40;vfpu_rd&#41;&#41;

holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

cool, added. constant n/a-zero seems to be Zero...

should we define symbolic defines for these values, we could use the same names as in binutils/opcodes/mips-dis.c ? maybe helpful, I suppose here is not much subject to change...
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

We can use the ones in mips_dis.c, I only changed them here because it was not clear if some values where being divided, multiplied , etc.. Stuff like VFPU _SQRT3_2 doesnt say to me its sqrt(3)/2, it could be interpreted in many ways =)

I'm also thinking this could use a manual. something like the mips architecture manuals. Anyone feel up to the challenge? Now''s a good time to get started on that task =)
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

mmhhh... we can rename them to make them more intuitive, but this would make transition between gas/objdump'd code and macro-generated code harder.

Poll: What would you prefer? (I'd go for the mips-dis.c names and add "VFPU_NULL" at index [0]... and you?)

about the manual: I like the idea... but have other priorities right now; for coding the include file is just fine, but as reference a well-formatted PDF including some prosa explaining the concepts and giving some help to beginners would definitely be cool!
mrbrown
Site Admin
Posts: 1537
Joined: Sat Jan 17, 2004 11:24 am

Post by mrbrown »

There is no constant at index 0. Instead use vzero.x (and vone.x for 1). My vote is for using the mips-dis.c names (especially since the assembler already supports these).

stefan needs to bust his hump and get the register operand support working already :).
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

well... the VFPU loads a zero, so we're only documenting it's behaviour - and redundant opcodes are more likely than an engineer not assigning the first constant in an array to some defined value - ;)
I added the defines named as in mips-dis.c.
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

holger: I modded your vfpu test code some to include usb and loadexec restart. Makes the compile/load/run cycle a bit faster =)

http://bradburn.net/mr.mr/files/newvfpuc.zip
mrbrown
Site Admin
Posts: 1537
Joined: Sat Jan 17, 2004 11:24 am

Post by mrbrown »

holger wrote:well... the VFPU loads a zero, so we're only documenting it's behaviour - and redundant opcodes are more likely than an engineer not assigning the first constant in an array to some defined value - ;)
I added the defines named as in mips-dis.c.
If it's "n/a" then I don't see how you can rely on it. There could be a newer revision of VFPU where that "undocumented" constant no longer resolves to 0. You don't gain anything over just using vzero.
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

mrbrown wrote:
holger wrote:well... the VFPU loads a zero, so we're only documenting it's behaviour - and redundant opcodes are more likely than an engineer not assigning the first constant in an array to some defined value - ;)
I added the defines named as in mips-dis.c.
If it's "n/a" then I don't see how you can rely on it. There could be a newer revision of VFPU where that "undocumented" constant no longer resolves to 0. You don't gain anything over just using vzero.
where does the n/a comes from?
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

MrMr[iCE] wrote:holger: I modded your vfpu test code some to include usb and loadexec restart. Makes the compile/load/run cycle a bit faster =)

http://bradburn.net/mr.mr/files/newvfpuc.zip
cool idea!
User avatar
groepaz
Posts: 305
Joined: Thu Sep 01, 2005 7:44 am
Contact:

Post by groepaz »

I'm also thinking this could use a manual. something like the mips architecture manuals. Anyone feel up to the challenge? Now''s a good time to get started on that task =)
catch me on irc and i'll send you a preview of the pdf i'm working on....i didnt add any vfpu stuff yet (there wasnt anything to add until some days ago :=P) but it'll be the next thing i'll do i guess :)
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

groepaz wrote:
I'm also thinking this could use a manual. something like the mips architecture manuals. Anyone feel up to the challenge? Now''s a good time to get started on that task =)
catch me on irc and i'll send you a preview of the pdf i'm working on....i didnt add any vfpu stuff yet (there wasnt anything to add until some days ago :=P) but it'll be the next thing i'll do i guess :)
can you please send it by pm? or even better, post here, I'm quite sure there are other ones interested in a review, too? don't have an irc client installed...
mrbrown
Site Admin
Posts: 1537
Joined: Sat Jan 17, 2004 11:24 am

Post by mrbrown »

holger wrote:where does the n/a comes from?

Code: Select all

      %vfpu_rd&#58;   VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
      %a&#58;         VFPU Constant ID    Value
               ================    ==========================================
               0  = n/a            0
My point was that just because constant index 0 resolves to 0 today doesn't mean it always will. vzero is the more obvious (and readable) way to store 0 into a register.

Besides the assembler will reject any made up name not in the list seen in mips-dis.c :).
MrMr[iCE]
Posts: 43
Joined: Mon Oct 03, 2005 4:55 pm

Post by MrMr[iCE] »

I typed n/a because in mips_dis.c, there is a list of strings for the VFPU constants, and nothing is defined for index 0, all thats there is ""

BTW the precision of the values I entered for vcst are not the full number (except for VFPU_HUGE). the test app I'm using limits float output to 5 decimals.

holger: more opcodes for ya

Code: Select all

/*
+----------------------+--------------+----+--------------+---+--------------+
|31                 23 | 22        16 | 15 | 14         8 | 7 | 6         0  |
+----------------------+--------------+----+--------------+---+--------------+
|  opcode 0x60000000   | vfpu_rt&#91;6-0&#93; |    | vfpu_rs&#91;6-0&#93; |   | vfpu_rd&#91;6-0&#93; |
+----------------------+--------------+----+--------------+---+--------------+

	VectorAdd.Single/Pair/Triple/Quad

    vadd.s %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Add Single
    vadd.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Add Pair
    vadd.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Add Triple
    vadd.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Add Quad

        %vfpu_rt&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rs&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- vfpu_regs&#91;%vfpu_rs&#93; + vfpu_regs&#91;%vfpu_rt&#93;
*/

#define vadd_s&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60000000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vadd_p&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60000080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vadd_t&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60008000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vadd_q&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60008080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+----------------------+--------------+----+--------------+---+--------------+
|31                 23 | 22        16 | 15 | 14         8 | 7 | 6         0  |
+----------------------+--------------+----+--------------+---+--------------+
|  opcode 0x608 &#40;s&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x608 &#40;p&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x608 &#40;t&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x608 &#40;q&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
+----------------------+--------------+----+--------------+---+--------------+

	VectorSub.Single/Pair/Triple/Quad

    vsub.s %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Single
    vsub.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Pair
    vsub.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Triple
    vsub.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Quad

        %vfpu_rt&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rs&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- vfpu_regs&#91;%vfpu_rs&#93; - vfpu_regs&#91;%vfpu_rt&#93;
*/

#define vsub_s&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60800000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsub_p&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60800080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsub_t&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60808000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vsub_q&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x60808080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+----------------------+--------------+----+--------------+---+--------------+
|31                 23 | 22        16 | 15 | 14         8 | 7 | 6         0  |
+----------------------+--------------+----+--------------+---+--------------+
|  opcode 0x638 &#40;s&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x638 &#40;p&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x638 &#40;t&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x638 &#40;q&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
+----------------------+--------------+----+--------------+---+--------------+

	VectorDiv.Single/Pair/Triple/Quad

    vdiv.s %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Single
    vdiv.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Pair
    vdiv.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Triple
    vdiv.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Quad

        %vfpu_rt&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rs&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- vfpu_regs&#91;%vfpu_rs&#93; / vfpu_regs&#91;%vfpu_rt&#93;
*/

#define vdiv_s&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x63800000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vdiv_p&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x63800080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vdiv_t&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x63808000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vdiv_q&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x63808080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+----------------------+--------------+----+--------------+---+--------------+
|31                 23 | 22        16 | 15 | 14         8 | 7 | 6         0  |
+----------------------+--------------+----+--------------+---+--------------+
|  opcode 0x640 &#40;s&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x640 &#40;p&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x640 &#40;t&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x640 &#40;q&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
+----------------------+--------------+----+--------------+---+--------------+

	VectorMul.Single/Pair/Triple/Quad

    vmul.s %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Single
    vmul.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Pair
    vmul.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Triple
    vmul.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Sub Quad

        %vfpu_rt&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rs&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- vfpu_regs&#91;%vfpu_rs&#93; * vfpu_regs&#91;%vfpu_rt&#93;
*/

#define vmul_s&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64000000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vmul_p&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64000080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vmul_t&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64008000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vmul_q&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64008080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+----------------------+--------------+----+--------------+---+--------------+
|31                 23 | 22        16 | 15 | 14         8 | 7 | 6         0  |
+----------------------+--------------+----+--------------+---+--------------+
|  opcode 0x648 &#40;p&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x648 &#40;t&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x648 &#40;q&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
+----------------------+--------------+----+--------------+---+--------------+

	VectorDotProduct.Pair/Triple/Quad

    vdot.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Dot Product Pair
    vdot.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Dot Product Triple
    vdot.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Dot Product Quad

        %vfpu_rt&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rs&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- dotproduct&#40;vfpu_regs&#91;%vfpu_rs&#93;, vfpu_regs&#91;%vfpu_rt&#93;&#41;
*/

#define vdot_p&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64800080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vdot_t&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64808000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vdot_q&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x64808080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;


/*
+----------------------+--------------+----+--------------+---+--------------+
|31                 23 | 22        16 | 15 | 14         8 | 7 | 6         0  |
+----------------------+--------------+----+--------------+---+--------------+
|  opcode 0x660 &#40;p&#41;    | vfpu_rt&#91;6-0&#93; | 0  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x660 &#40;t&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 0 | vfpu_rd&#91;6-0&#93; |
|  opcode 0x660 &#40;q&#41;    | vfpu_rt&#91;6-0&#93; | 1  | vfpu_rs&#91;6-0&#93; | 1 | vfpu_rd&#91;6-0&#93; |
+----------------------+--------------+----+--------------+---+--------------+

	VectorHomogenousDotProduct.Pair/Triple/Quad

    vhdp.p %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Dot Product Pair
    vhdp.t %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Dot Product Triple
    vhdp.q %vfpu_rd, %vfpu_rs, %vfpu_rt   ; Dot Product Quad

        %vfpu_rt&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rs&#58;	VFPU Vector Source Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- homogenousdotproduct&#40;vfpu_regs&#91;%vfpu_rs&#93;, vfpu_regs&#91;%vfpu_rt&#93;&#41;
*/

#define vhdp_p&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x66000080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vhdp_t&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x66008000 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;
#define vhdp_q&#40;vfpu_rd,vfpu_rs,vfpu_rt&#41;  &#40;0x66008080 | &#40;&#40;vfpu_rt&#41; << 16&#41; | &#40;&#40;vfpu_rs&#41; << 8&#41; | &#40;vfpu_rd&#41;&#41;

/*
+-------------------------------------------------------------+--------------+
|31                                   16 | 15 | 14     8  | 7 | 6         0  |
+-------------------------------------------------------------+--------------+
| opcode 0xd003 &#40;p&#41;                      |  0 |      0    | 1 | vfpu_rd&#91;6-0&#93; |
| opcode 0xd003 &#40;t&#41;                      |  1 |      0    | 0 | vfpu_rd&#91;6-0&#93; |
| opcode 0xd003 &#40;q&#41;                      |  1 |      0    | 1 | vfpu_rd&#91;6-0&#93; |
+-------------------------------------------------------------+--------------+
	
	VectorLoadIdentity.Pair/Triple/Quad

    vidt.p %vfpu_rd	; Set 2x1 Vector to Identity
    vidt.t %vfpu_rd	; Set 3x1 Vector to Identity
    vidt.q %vfpu_rd	; Set 4x1 Vector to Identity

        %vfpu_rd&#58;	VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;

    vfpu_regs&#91;%vfpu_rd&#93; <- identity vector
*/
#define vidt_p&#40;vfpu_rd&#41;  &#40;0xd0030080 | &#40;vfpu_rd&#41;&#41;
#define vidt_t&#40;vfpu_rd&#41;  &#40;0xd0038000 | &#40;vfpu_rd&#41;&#41;
#define vidt_q&#40;vfpu_rd&#41;  &#40;0xd0038080 | &#40;vfpu_rd&#41;&#41;
User avatar
groepaz
Posts: 305
Joined: Thu Sep 01, 2005 7:44 am
Contact:

Post by groepaz »

can you please send it by pm? or even better, post here, I'm quite sure there are other ones interested in a review, too? don't have an irc client installed...
i will do, if anyone else wants it, leave me a pm.

i dont want to post the url to it publicly yet, since its still *very* much work in progress, contains outdated information in certain areas, and shouldnt be used by anyone who isnt exactly knowing how to use this kind of "guessed" information. oh well :) its the next thing i'll work on after cleaning up and releasing the pspinside source.
holger
Posts: 204
Joined: Thu Aug 18, 2005 10:57 am

Post by holger »

mrbrown wrote:
holger wrote:where does the n/a comes from?

Code: Select all

      %vfpu_rd&#58;   VFPU Vector Destination Register &#40;&#91;s|p|t|q&#93;reg 0..127&#41;
      %a&#58;         VFPU Constant ID    Value
               ================    ==========================================
               0  = n/a            0
My point was that just because constant index 0 resolves to 0 today doesn't mean it always will. vzero is the more obvious (and readable) way to store 0 into a register.

Besides the assembler will reject any made up name not in the list seen in mips-dis.c :).
well... we're the ones building the assembler, not? nevertheless... since you all seem to have serious doubts that the zero=0 guess is true, maybe we should rather remove this define...
Post Reply