The Acorn Computer User WWW Server

SWI veneer substitiute for _kernal_swi

From: enevill@acorn.co.uk (Edward Nevill)
Subject: New SWI veneers
Date: 4 Oct 91 17:11:01 GMT
Organization: Acorn Computers Ltd, Cambridge, England

Here is an optimised SWI veneer which can be used instead of _kernel_swi.
It is significantly faster than _kernel_swi and is much easier to use (IMHO).

Enjoy,
Edward.
--- swiv.h ---
/* SWI veneers:
 *  Written by Edward Nevill and Jonathan Roach in an idle moment between projects.
 */

/* Anonymous Error type */
typedef struct Error Error;

/* Generic SWI interface
 *  swi(swino,mask,regs...)
 *   swino = SWI number to call as defined in h.swis, X bit set if you wish the
 *           X form of the SWI to be called, clear if you want the non X form.
 *   reg_mask = mask of in / out registers
 *              bits 0-9:   Bit N set => Register N specified on input
 *                          Bit N clear => Register N unspecified on input
 *              bits 22-31: Bit N set => Register N-22 on output stored
 *                              in address specified in varargs list.
 *   ...        In order, input registers followed by output registers,
 *                      starting at r0 and going up.
 *   returns 0 or errorblock pointer if X-bit set
 *   returns r0 if X-bit clear
 *  swix(swino,mask,regs...)
 *   This behaves identically to 'swi' except that it always calls the X form.
 *
 * Eg:
 *   swi(OS_SWINumberToString, IN(R0|R1|R2), n, buf, 255);
 *   e = swi(XOS_SWINumberFromString, IN(R1)|OUT(R0), str, &n);
 *       - Error block pointer (or 0) is returned so must get returned R0
 *       - via argument list.
 *   e = swix(OS_SWINumberFromString, IN(R1)|OUT(R0), str, &n);
 *       - As above but uses the swix function rather that setting the X bit
 *         explicitly (saves one instruction on SWI call).
 *   e = swi(OS_File, IN(R0|R1|R2|R3)|OUT(R4), 255, name, buff, 0, &len);
 *       - We don't care about the load, exec or attrs so don't specify
 *         them in the output registers.
 */

extern Error *swix(int swino, int reg_mask, ...);
extern int swi(int swino, int reg_mask, ...);

/* Register mask macros
 *  The bits in the register mask are arranged as follows:
 *  31 30 29 ... 22 ...  8 ...  2  1  0
 *  O0 O1 O2 ... O9     I9 ... I2 I1 I0  I(N) = bit set if R(N) used on entry
 *                                       O(N) = bit set if R(N) written on exit
 *  The bits are arranged like this to optimise the case where a SWI is being
 *  called with a small number of input and output registers. For example, a SWI
 *  call which uses R0-R5 on entry and R0-R1 on exit will have a register mask
 *  of 0xC000003f which can be loaded into an ARM register in one instruction
 *  (the compiler performs this optimisation, even when the constant wraps
 *  around between bits 0 and 31). Using the more obvious coding of I0-I9 in bits
 *  0 - 9 and O0-O9 in bits 16-23 leads to a constant of 0x0003003f which require
 *  two instructions.
 */
#define IN(m) (m)
#define OUT(m) ((unsigned)(m&1)<<31|(m&2)<<29|(m&4)<<27|(m&8)<<25|(m&16)<<23|\
                (m&32)<<21|(m&64)<<19|(m&128)<<17|(m&256)<<15|(m&512)<<13)

/* The register names
 *  Change these to use different names if you use R0 - R9 elsewhere in your program
 */
#define R0 0x001
#define R1 0x002
#define R2 0x004
#define R3 0x008
#define R4 0x010
#define R5 0x020
#define R6 0x040
#define R7 0x080
#define R8 0x100
#define R9 0x200
--- swiv.s ---
r0              RN      0
r1              RN      1
r2              RN      2
r3              RN      3
r4              RN      4
r5              RN      5
r6              RN      6
r7              RN      7
r8              RN      8
r9              RN      9
r10             RN      10
r11             RN      11
r12             RN      12
sp              RN      13
lr              RN      14
pc              RN      15


                AREA    |C$$code|, CODE, READONLY

SWIReturnInst   LDR     pc, [sp, #0*4]

        EXPORT  swix
swix    ROUT
        ORR     r0, r0, #&20000

        EXPORT  swi
swi     ROUT

; Construct a stack frame that looks something like this:
;       returnval
;       LDMIA   r12!, {r0..rn}
;       SWI     xxxxxx
;       LDR     pc, [sp]
;       saved r4-r11,lr
;       saved r1
;       saved input values (r2...rn)

        STMFD   sp!, {r1-r3}            ; Save r1 and put 1st two variadic args on stack
        BIC     r2, r0, #&ff000000
        ORR     r2, r2, #&ef000000      ; Construct SWI instruction
        ADR     r0, SWIReturn
        BIC     r1, r1, #&ff000000      ; Construct LDMIA R12!, {regs} instruction, if
        BICS    r1, r1, #&00ff0000      ; {regs} = {} (IE no input regs) we must not
        ORRNE   r1, r1, #&e8000000      ; use an LDMIA R12!, {} instruction as this is an
        ORRNE   r1, r1, #&00bc0000      ; invalid instruction, we use a suitable NOP instead
        MOVEQ   r1, #0                  ; 0 = opcode for ANDEQ r0, r0, r0 (a suitable NOP)
        LDR     r3, SWIReturnInst
        STMFD   sp!, {r0-r9,r11,lr}     ; Save regs and set up SWI call routine (in R0-R3)
        ADD     r12, sp, #(12+1)*4      ; Point R12 at input regs on stack.
        ADD     pc, sp, #4              ; Call routine on stack
SWIReturn
        LDR     lr, [sp, #(12+0)*4]     ; Fetch reg mask again
        MOVS    lr, lr, ASL #1          ; Shift out setting C if R0 to be written, N
        LDRCS   r11, [r12], #4          ; if R1 to be written.
        STRCS   r0, [r11]
        LDRMI   r11, [r12], #4
        STRMI   r1, [r11]
        MOVS    lr, lr, ASL #2          ; Shift 2 bits each time for the next 2 regs
        LDRCS   r11, [r12], #4
        STRCS   r2, [r11]
        LDRMI   r11, [r12], #4
        STRMI   r3, [r11]
        MOVS    lr, lr, ASL #2
        LDRCS   r11, [r12], #4
        STRCS   r4, [r11]
        LDRMI   r11, [r12], #4
        STRMI   r5, [r11]
        MOVS    lr, lr, ASL #2
        LDRCS   r11, [r12], #4
        STRCS   r6, [r11]
        LDRMI   r11, [r12], #4
        STRMI   r7, [r11]
        MOVS    lr, lr, ASL #2
        LDRCS   r11, [r12], #4
        STRCS   r8, [r11]
        LDRMI   r11, [r12], #4
        STRMI   r9, [r11]
        LDR     r1, [sp, #2*4]
        TST     r1, #&20000             ; X-bit clear
        CMPEQ   pc, #&80000000          ; SET V flag if so, so R0 not cleared
        MOVVC   r0, #0                  ; Clear R0 if no error (or X-bit clear)
        ADD     sp, sp, #4*4            ; Drop SWI call routine
        LDMIA   sp!, {r4-r9,r11,lr}
        ADD     sp, sp, #3*4            ; Drop saved R1 and 1st two variadic args.
        MOVS    pc, lr

        END
--- switime.c ---
/* This program times the 'swi' and '_kernel_swi' veneers. It calls the X form of
 * OS_GenerateError (the fastest possible swi since all it does is set the V flag).
 * It calls it with a sample register set which would be used to call XOS_ReadVarVal.
 *
 * IE. It sets up the registers as though it were calling XOS_ReadVarVal and then
 * calls a trivial SWI (XOS_GenerateError) so that it only measure the overhead of
 * calling a typical SWI.
 *
 * Timings for an A540 in mode 0
 *                  swi      _kernel_sw
 *  Cache enabled   168 csec 252 csec
 *  Cache disabled  340 csec 497 csec
 *
 * It also serves as an example of how much easier the 'swi' veneer is to use. Note the
 * 6 or 7 lines required using '_kernel_swi' compared with the one line using 'swi'.
 */
#include 

#include "kernel.h"
#include "swiv.h"
#include "swis.h"

int main(void)
{
    _kernel_swi_regs r;
    int t1, t2, t3;
    int i;
    int exists;

    t1 = swi(OS_ReadMonotonicTime, 0);
    for (i = 0; i < 100000; i++)
        swix(OS_GenerateError, IN(R0|R2|R3|R4)|OUT(R2), "ADFSFiler$Path", -1, 0, 0, &exists);
    t2 = swi(OS_ReadMonotonicTime, 0);
    for (i = 0; i < 100000; i++) {
        r.r[0] = (int)"ADFSFiler$Path";
        r.r[2] = -1;
        r.r[3] = 0;
        r.r[4] = 0;
        _kernel_swi(OS_GenerateError, &r, &r);
        exists = r.r[2];
    }
    t3 = swi(OS_ReadMonotonicTime, 0);
    printf("Times:-\nswix = %d csec\n_kernel_swi = %d csec\n", t2-t1, t3-t2);
    return 0;
}
---


From: enevill@acorn.co.uk (Edward Nevill)
Subject: New SWI veneers (2)
Date: 21 Oct 91 16:01:22 GMT
Organization: Acorn Computers Ltd, Cambridge, England

A couple of weeks ago I posted a set of variadic swi wrapper
routines, unfortunately they contained a bug.

The macro OUT defined in h.swiv currently reads

#define OUT(m) ((unsigned)(m&1)<<31|(m&2)<<29|(m&4)<<27|\
                (m&8)<<25|(m&16)<<23|(m&32)<<21|(m&64)<<19|\
                (m&128)<<17|(m&256)<<15|(m&512)<<13)

it should read

#define OUT(m) ((unsigned)((m)&1)<<31|((m)&2)<<29|((m)&4)<<27|\
                ((m)&8)<<25|((m)&16)<<23|((m)&32)<<21|((m)&64)<<19|\
                ((m)&128)<<17|((m)&256)<<15|((m)&512)<<13)

IE. it should have a pair of brackets around each 'm' in the
macro expansion.

Given that a typical use of the macro is something like OUT(R0|R1)
this can lead to fatal results (eg address exceptions, stack
trampling) so you should change your headers to use the new macros.

Edward.
poppy@poppyfields.net