Tuesday, November 18, 2008

On filesystems

Some PalmOS thigns will not work on DGOS. Specifically I am talking about SlotDrivers and FsLibs. Why? Specifically because:
  • In DGOS, for efficiency reasons, the driver model is differet from PalmOS
  • Drivers are in kernel space in DGOS, so they can make very quick system calls, and admitting any PalmOS code into kernel space is a bad idea because it was never written to be even thread safe
  • Porting an existing driver to DGOS is not hard


Filesystems are created as a kernel module, and register themselves with the DGOS kernel using the vfsRegisterFs() function, which takes, as a parameter, a pointer to a VfsFS structure. This structure is a table of function pointers and an FS name.

The module provides this, and from then on, until it is unregistered using vfsUnregisterFs(), it can be used to mount and format filesystems.

Here it is in details:



#define VFS_OPEN_MODE_READ 1
#define VFS_OPEN_MODE_RW 2
#define VFS_OPEN_MODE_EXCLUSIVE 4

#define VFS_SEEK_SET 0
#define VFS_SEEK_CUR 1
#define VFS_SEEK_END 2

#define FILE_ATTR_HIDDEN 0x00000001
#define FILE_ATTR_SYSTEM 0x00000002
#define FILE_ATTR_READONLY 0x00000004
#define FILE_ATTR_ARCHIVE 0x00000008

#define VFS_FS_NAME_SZ 16

typedef Err (*VfsVolInitFn) (void** fsData);
typedef Err (*VfsVolDeinitFn) (void* fsData);

typedef Err (*VfsFsMountFn) (void* fsData,UInt32 blkDev);
typedef Err (*VfsFsUnmountFn) (void* fsData);
typedef Err (*VfsFsSizesFn) (void* fsData,UInt64* free,UInt64* used,UInt64* total); //in bytes
typedef Err (*VfsFsInfoFn) (void* fsData,VfsFsInfo* info);
typedef Err (*VfsFsCreate) (void* fsData,UInt32 blkDev);
typedef Err (*VfsFsGetLabel) (void* fsData,wchar_t* label,UInt32* size /* in / out */);
typedef Err (*VfsFsSetLabel) (void* fsData,const wchar_t* label);

typedef Err (*VfsFileCreateFn) (void* fsData,const wchar_t* path);
typedef Err (*VfsFileDeleteFn) (void* fsData,const wchar_t* path);
typedef Err (*VfsFileRenameFn) (void* fsData,const wchar_t* oldPath,const wchar_t* newPath); //MUST handle moving files too, not just renaming

typedef Err (*VfsFileOpenFn) (void* fsData,const wchar_t* path,UInt32 mode,void** openFileData);
typedef Err (*VfsFileCloseFn) (void* fsData,void* openFileData);
typedef Err (*VfsFileReadFn) (void* fsData,void* openFileData,void* buf,UInt32 bytes,UInt32* done);
typedef Err (*VfsFileWriteFn) (void* fsData,void* openFileData,const void* buf,UInt32 bytes,UInt32* done);
typedef Err (*VfsFileSeekFn) (void* fsData,void* openFileData,UInt32 whence,Int32 offset);
typedef Err (*VfsFileTellFn) (void* fsData,void* openFileData,UInt32* position);
typedef Err (*VfsFileGetAttrFn) (void* fsData,void* openFileData,UInt32* attr);
typedef Err (*VfsFileSetAttrFn) (void* fsData,void* openFileData,UInt32 attr);
typedef Err (*VfsFileGetDateFn) (void* fsData,void* openFileData,UInt32* create,UInt32* mod,UInt32* access); //null to not get
typedef Err (*VfsFileSetDateFn) (void* fsData,void* openFileData,UInt32* create,UInt32* mod,UInt32* access); //null to not set
typedef Err (*VfsFileGetSizeFn) (void* fsData,void* openFileData,UInt32* sz);
typedef Err (*VfsFileSetSizeFn) (void* fsData,void* openFileData,UInt32 sz);

typedef Err (*VfsDirCreateFn) (void* fsData,const wchar_t* path);
typedef Err (*VfsDirDeleteFn) (void* fsData,const wchar_t* path);
typedef Err (*VfsDirOpenFn) (void* fsData,const wchar_t* path,void** openDirData);
typedef Err (*VfsDirCloseFn) (void* fsData,void* openDirData);
typedef Err (*VfsDirEnumeFn) (void* fsData,void* openDirData,UInt32* iterator,VfsDirEntry* entry);

typedef struct{

wchar_t fsName[VFS_FS_NAME_SZ];

//vol ops
VfsVolInitFn volInit;
VfsVolDeinitFn volDeinit;

//FS ops
VfsFsMountFn fsMount;
VfsFsUnmountFn fsUnmount;
VfsFsSizesFn fsSizes; //used,free,total
VfsFsInfoFn fsInfo;
VfsFsCreate fsCreate; //format a disk
VfsFsGetLabel fsGetLabel;
VfsFsSetLabel fsSetLabel;

//ops on unopened files
VfsFileCreateFn fileCreate;
VfsFileDeleteFn fileDelete;
VfsFileRenameFn fileRename;

//ops on opened files
VfsFileOpenFn fileOpen;
VfsFileCloseFn fileClose;
VfsFileReadFn fileRead;
VfsFileWriteFn fileWrite;
VfsFileSeekFn fileSeek;
VfsFileTellFn fileTell;
VfsFileGetAttrFn fileGetAttr;
VfsFileSetAttrFn fileSetAttr;
VfsFileGetDateFn fileGetDates;
VfsFileSetDateFn fileSetDates;
VfsFileGetSizeFn fileGetSize;
VfsFileSetSizeFn fileSetSize; //resize

//ops on unopened dirs
VfsDirCreateFn dirCreate;
VfsDirDeleteFn dirDelete;

//open on open dirs
VfsDirOpenFn dirOpen;
VfsDirCloseFn dirClose;
VfsDirEnumeFn dirEnumerate;

}VfsFS;

Wednesday, November 12, 2008

On the current bootloader and the boot process

The current bootloader is a hack on top of the PalmPowerups bootloader as used in PowerDrive. Booting while holding down the [home] key enters it, and it waits for a FAT32-formatted card [FAT12 and 16 NOT supported] card to be inserted containing a file called "kernel" in its root folder. The reason for lack of support for FAT12 and FAT16 is that the bootloader is NOT using my FAT driver, but in fact another one, under BSD license, which only supports FAT32 (and barely so).

After th ekernel is loaded from card into ram, the bootloader jumps to it.

The kernel copies itself to a known location in ram (the start of physical ram) and jumps to itself there. Once there it enables the MMU, and caches, and sets up the memory map for itself. Kernel space on DGOS begins at 0xE0000000. All lower addresses are available for use by user applications. This will allow a lot of PalmOS apps to run unmodified, that rely on memory addresses like 0x8xxxxxxx and 0xACxxxxxx being available to them.

Once the memory space is all setup, the LCD driver re-initializes the LCD (which has been on this entire time). LCD is initialized very early to enable printing debug messages to it. After that other peripherals are brought up, like LifeDrive's internal HDD.

A few more pieces

The kernel rewrite is going well.

Finished the block device manager and cache.
The RTC driver is working as well (smallest driver in the whole OS)
The FAT32 driver is also shaping up quite well.
The module manager is up and running, and thus all drivers are now separate modules (with the exception of the SDHC and the FAT32 drivers, since they are needed to read the card and thus to load further modules. This requirement will eventually go away, as the bootloader will be modified to load the default block device and default filesystem drivers into ram by itself.

Sunday, November 9, 2008

Unicode

System string and locale manager has been rewritten to sully support unicode. All applications will thus support unicode by default, without any need for additional code by the programmer. For legacy apps (like PalmOS) a default encoding will need to be specified since they use 8-bit characters.

filesystems

i am working on implementing FAT12/16/32/exFAT support working now.

Sunday, November 2, 2008

DGOS blog

This blog will follow the creation of DGOS. My operating system for mobile devices. It is currently being developed on a Palm LifeDrive. I will post here interesting pieces of code, ideas, or design decisions.



Today's post shows how easy it is to drive device drivers in DGOS. This is the COMPLETE source code for the hard drive driver in the LD. Note a complete lack of synchronization primitives, or anything else. The OS handles that for the driver.

Note that the only entry point into the driver is "hddInit"
Requests and responses are handled by the "requestQ" primitive. It is smart, blocking the driver thread while waiting for requests and blocking requesting threads while waiting for responses. While this driver is single-threaded, multithreaded drivers can use it too, since this primitive is thread-safe.

#include "HDD.h"
#include "task.h"
#include "blockDev.h"
#include "requestQ.h"
#include "kHeap.h"
#include "timers.h"
#include "gpio.h"


#define MCIO0            (map[14])
#define MECR            (map[ 5])

typedef struct{
    
    UInt32*    hddBase;
    UInt32    blkDevID;
    UInt32    reqQID;
    
    char    manuf[32];
    char    prod[32];
    char    serial[32];
    
    UInt32    numBlocks;
    
}HddGlobals;

typedef struct{
    
    UInt8 err_or_features;        //addr = 1
    UInt8 numSectors;            //addr = 2
    UInt8 lba_0_7;                //addr = 3
    UInt8 lba_8_15;                //addr = 4
    UInt8 lba_16_23;            //addr = 5
    UInt8 lba_24_31;            //addr = 6
    UInt8 status_or_cmd;        //addr = 7
    
}HddCmd;

static void hddPrvReadSec(volatile void* base,UInt8* buf);
static void hddPrvWriteSec(volatile void* base,UInt8* buf);

static Err hddPrvWaitForDriveReady(volatile void* hddBase){
    
    volatile UInt8* statusField = ((volatile UInt8*)hddBase) + 0x17;
    volatile UInt8* errField     = ((volatile UInt8*)hddBase) + 0x11;
    UInt32 t,v;
    
    t = timersGetTicks();
    
    do{
        v = *statusField;
        
        if(v == 0x50){        //ready
                
            return errNone;
        }
        
    }while(timersGetTicks() - t < timersTicksPerMsec() * 500);
    
    return 0xFF00 + *errField;
}

static Err hddPrvWaitForNotBusy(volatile void* hddBase){
    
    volatile UInt8* statusField = ((volatile UInt8*)hddBase) + 0x17;
    volatile UInt8* errField     = ((volatile UInt8*)hddBase) + 0x11;
    UInt32 t,v;
    
    
    t = timersGetTicks();
    
    do{
        v = *statusField;
        
        if(v & 0x80){        //busy
            
            //nothing
        }
        else if(v & 1){        //error
            
            break;
        }
        else{
            
            return errNone;
        }
        
    }while(timersGetTicks() - t < timersTicksPerMsec() * 500);
    
    return 0xFF00 + *errField;
}

static Err hddPrvWaitForReadyForData(volatile void* hddBase){
    
    volatile UInt8* statusField = ((volatile UInt8*)hddBase) + 0x17;
    volatile UInt8* errField     = ((volatile UInt8*)hddBase) + 0x11;
    UInt32 t,v;
    
    
    t = timersGetTicks();
    
    do{
        v = *statusField;
        
        if(!(v & 0x08)){    //not ready for dat
            
            //nothing
        }
        else if(v & 1){        //error
            
            break;
        }
        else{
            
            return errNone;
        }
        
    }while(timersGetTicks() - t < timersTicksPerMsec() * 1000);
    
    return 0xFF00 + *errField;
}

static Err hddPrvCmd(volatile void* hddBase,HddCmd* cmd){
    
    volatile UInt8* p = hddBase;
    Err e;
    
    e = hddPrvWaitForDriveReady(hddBase);
    if(e) return e;
    
    p[0x11] = cmd->err_or_features;
    p[0x12] = cmd->numSectors;
    p[0x13] = cmd->lba_0_7;
    p[0x14] = cmd->lba_8_15;
    p[0x15] = cmd->lba_16_23;
    p[0x16] = cmd->lba_24_31;
    p[0x17] = cmd->status_or_cmd;
    
    e = hddPrvWaitForNotBusy(hddBase);
    
    cmd->err_or_features     = p[0x11];
    cmd->numSectors            = p[0x12];
    cmd->lba_0_7            = p[0x13];
    cmd->lba_8_15            = p[0x14];
    cmd->lba_16_23            = p[0x15];
    cmd->lba_24_31            = p[0x16];
    cmd->status_or_cmd        = p[0x17];
    
    return e;
}

static Err hddPrvCmd_read(volatile void* hddBase,HddCmd* cmd,UInt32 numSectors,UInt8* buf){
    
    volatile UInt8* p = hddBase;
    Err e;
    
    e = hddPrvWaitForDriveReady(hddBase);
    if(e) return e;
    
    p[0x11] = cmd->err_or_features;
    p[0x12] = cmd->numSectors;
    p[0x13] = cmd->lba_0_7;
    p[0x14] = cmd->lba_8_15;
    p[0x15] = cmd->lba_16_23;
    p[0x16] = cmd->lba_24_31;
    p[0x17] = cmd->status_or_cmd;
    
    
    while(numSectors--){
        
        e = hddPrvWaitForNotBusy(hddBase);
        if(e) break;
        e = hddPrvWaitForReadyForData(hddBase);
        if(e) break;
        
        hddPrvReadSec(hddBase,buf);
        
        buf += 512;
    }
    
    cmd->err_or_features     = p[0x11];
    cmd->numSectors            = p[0x12];
    cmd->lba_0_7            = p[0x13];
    cmd->lba_8_15            = p[0x14];
    cmd->lba_16_23            = p[0x15];
    cmd->lba_24_31            = p[0x16];
    cmd->status_or_cmd        = p[0x17];
    
    return e;
}

static Err hddPrvCmd_write(volatile void* hddBase,HddCmd* cmd,UInt32 numSectors,UInt8* buf){
    
    volatile UInt8* p = hddBase;
    Err e;
    
    e = hddPrvWaitForDriveReady(hddBase);
    if(e) return e;
    
    p[0x11] = cmd->err_or_features;
    p[0x12] = cmd->numSectors;
    p[0x13] = cmd->lba_0_7;
    p[0x14] = cmd->lba_8_15;
    p[0x15] = cmd->lba_16_23;
    p[0x16] = cmd->lba_24_31;
    p[0x17] = cmd->status_or_cmd;
    
    while(numSectors--){
        
        e = hddPrvWaitForNotBusy(hddBase);
        if(e) break;
        e = hddPrvWaitForReadyForData(hddBase);
        if(e) break;
        
        hddPrvWriteSec(hddBase,(UInt8*)buf);
        
        buf += 512;
    }
    
    cmd->err_or_features     = p[0x11];
    cmd->numSectors            = p[0x12];
    cmd->lba_0_7            = p[0x13];
    cmd->lba_8_15            = p[0x14];
    cmd->lba_16_23            = p[0x15];
    cmd->lba_24_31            = p[0x16];
    cmd->status_or_cmd        = p[0x17];
    
    return e;
}

//MemMove that also does a byteswap16() on each moved halfword. Useful since device identification strings are byte-swapped in response.
static asm memSwapCopy(void* dstP,void* srcP,UInt32 len){        //only even lengths please
    
    MOV R2,R2,LSR #1
    
loop:
    
    LDRB R3, [R1],#1
    LDRB R12,[R1],#1
    
    STRB R3, [R0,#1]
    STRB R12,[R0],#2
    
    SUBS R2,R2,#1
    BNE loop
    
    BX LR
}

Err hddPrvReadDriveID(HddGlobals* g){
    
    HddCmd cmd;
    UInt32 bufP[128];
    UInt8* buf = (UInt8*)bufP;
    UInt32 i;
    Err e;
    
    cmd.lba_24_31 = 0xE0;
    cmd.status_or_cmd = 0xEC;        //drive identify [OPTIONAL cmd]
    
    e = hddPrvCmd_read(g->hddBase,&cmd,1,buf);
    if(e) return e;
    
    MemSet(g->serial,sizeof(g->serial),0);
    memSwapCopy(g->serial,buf + 20,20);
    
    MemSet(g->manuf,sizeof(g->manuf),0);
    memSwapCopy(g->manuf,buf + 46,8);
    
    MemSet(g->prod,sizeof(g->prod),0);
    memSwapCopy(g->prod,buf + 54,31);
    
    g->numBlocks = *(UInt32*)(buf + 120);
    
    return errNone;
}

static void hddPrvDrivePower(Boolean on){
    
    gpioSetState(115,on);
}

static void hddPrvDriveReset(){
    
    gpioSetState(98,false);
    taskDelay(taskGetCurTask(),2);
    gpioSetState(98,true);
}

static void hddPrvDriveInit(HddGlobals* g){
    
    hddPrvDrivePower(true);
    taskDelay(taskGetCurTask(),2);
    hddPrvDriveReset();
    taskDelay(taskGetCurTask(),20);
    
    hddPrvReadDriveID(g);
}

static Err hddReadWriteBlocks(HddGlobals* g,UInt32 first,UInt32 num,void* bufP,UInt32* numDone,Boolean write){
    
    Err (*func)(volatile void* hddBase,HddCmd* cmd,UInt32 numSectors,UInt8* buf);
    UInt32 numSec,done = 0;
    UInt8* buf = bufP;
    HddCmd cmd;
    Err e;
    
    
    func = write?hddPrvCmd_write:hddPrvCmd_read;
    
    if(((UInt32)bufP) & 3) Panic("non-word aligned buffer address");
    
    while(num){
        
        numSec = num;
        if(numSec > 255) numSec = 255;
        
        cmd.status_or_cmd = write?0x30:0x20;
        cmd.lba_0_7 = first;
        cmd.lba_8_15 = first >> 8;
        cmd.lba_16_23 = first >> 16;
        cmd.lba_24_31 = (first >> 24) | 0xE0;
        cmd.numSectors = numSec;
        cmd.err_or_features = 0;
        
        e = func(g->hddBase,&cmd,numSec,buf);
        
        if(e){
            
            *numDone = done;
            return e;
        }
        else{
            
            buf += 512 * numSec;
            num -= numSec;
        }
    }
    
    return errNone;
}

static void hddPrvIfaceInit(HddGlobals* g){
    
    volatile UInt32* map;
    
    map = mmuMapReq(true,0x48000000,1,MAP_PERM_SYS_RW);
    if(map == MMU_INVALID_PTR) Panic("Cannot map in memory controller");
    
    MCIO0 = 0x1460D;
    MECR &=~ 2;            //no PC-cars inserted
    MECR |= 1;            //two PC-card slots
    MECR |= 2;            //PC-card(s) inserted
    
    mmuMapRel((void*)map,1);
    
    gpioSetFunc(115,0);
    gpioSetDir(115,GPIO_DIR_OUT);
    
    gpioSetFunc(98,0);
    gpioSetDir(98,GPIO_DIR_OUT);
    
}

static void* hddPrvThread(void* gP){
    
    HddGlobals* g = gP;
    BlkDevCmd* c;
    UInt32 devID,reqQID,reqH;
    char* buf;
    UInt32 theirMemH;
    
    blkDevCreate("Internal HDD",&devID,&reqQID);
    
    g->blkDevID = devID;
    g->reqQID = reqQID;
    
    hddPrvIfaceInit(g);
    hddPrvDriveInit(g);
    
    while(1){
        
        charsPrintF("getting request\n");
        
        requestQserviceGet(taskGetCurTask(),reqQID,&reqH,(void**)&c);
        
        charsPrintF("request gotten\n");
        charsPrintF("cmd=%d\n",c->cmd);
        
        c->error = errNone;
        
        switch(c->cmd){
            
            case CMD_READ_BLOCKS:
                
                buf = taskMapInTheirMem(c->data.blocks.requestingProcess,c->data.blocks.buffer,c->data.blocks.numBlocks * 512,&theirMemH);
                blkDevNotifyOp(devID,true,false);
                c->error = hddReadWriteBlocks(g,c->data.blocks.firstBlock,c->data.blocks.numBlocks,buf,&c->data.blocks.numBlocks,false);
                blkDevNotifyOp(devID,false,false);
                taskUnmapTheirMem(buf,theirMemH);
                break;
            
            case CMD_WRITE_BLOCKS:
                
                buf = taskMapInTheirMem(c->data.blocks.requestingProcess,c->data.blocks.buffer,c->data.blocks.numBlocks * 512,&theirMemH);
                blkDevNotifyOp(devID,true,true);
                c->error = hddReadWriteBlocks(g,c->data.blocks.firstBlock,c->data.blocks.numBlocks,buf,&c->data.blocks.numBlocks,true);
                blkDevNotifyOp(devID,false,true);
                taskUnmapTheirMem(buf,theirMemH);
                break;
                
            case CMD_QUERY_INFO:
                
                c->data.info.numBlocks = g->numBlocks;
                c->data.info.removable = false;
                c->data.info.inserted = true;
                c->data.info.writeable = true;
                break;
            
            case CMD_QUERY_MANUF_NAME:
                
                MemMove(c->data.manufName,g->manuf,sizeof(g->manuf));
                break;
            
            case CMD_QUERY_PROD_NAME:
                
                MemMove(c->data.prodName,g->prod,sizeof(g->prod));
                break;
            
            case CMD_QUERY_SERIAL_NUM:
                
                MemMove(c->data.serialNum,g->serial,sizeof(g->serial));
                break;
            
            default:
                
                Panic("Unknown cmd to HDD driver");
                break;
        }
        
        requestQserviceDone(reqH);
    }
}

void hddInit(){
    
    Thread* t;
    HddGlobals* g;
    volatile UInt32* hddBase;
    
    g = kHeapAlloc(sizeof(HddGlobals));
    if(!g) Panic("Cannot alloc HDD globals");
    
    hddBase = mmuMapReq(true,0x20000000,1,MAP_PERM_SYS_RW);
    if(hddBase == MMU_INVALID_PTR) Panic("Cannot map in HDD controller");
    
    charsPrintF("hddBase=0x%08lx\n",hddBase);
    
    g->hddBase = (UInt32*)hddBase;
    
    t = taskNewKernelThread(&hddPrvThread,g,PAGE_SIZE,DEFAULT_BLOCK_DRV_PRIORITY);
    taskUnblockThread(t);
    
    charsPrintF("done\n");
}





///////////////// ASM CODE ////////////////////////////////

//optimzed for speed from this C code:
//void hddPrvReadSec(volatile void* base,UInt8* buf){
//    volatile UInt16* p = base;
//    UInt16* dst = (UInt16*)buf;
//    UInt32 left = 256;
//    p += 5; //point it to I/O address
//    while(left--) *dst++ = *p;
//}
//advantage is that this ASM version reads 32 bytes at once
static asm void hddPrvReadSec(volatile void* base,UInt8* buf){
    
    STMFD SP!,{R4-R11,LR}
    ADD R0,R0,#0x10
    MOV R12,#16
    
loop:
    
    LDRH R2, [R0]
    LDRH R3, [R0]
    
    LDRH R4, [R0]
    LDRH R5, [R0]
    
    LDRH R6, [R0]
    LDRH R7, [R0]
    
    LDRH R8, [R0]
    LDRH R9, [R0]
    
    LDRH R10,[R0]
    LDRH R11,[R0]
    
    ORR R2, R2, R3, LSL #16
    ORR R3, R4, R5, LSL #16
    ORR R4, R6, R7, LSL #16
    ORR R5, R8, R9, LSL #16
    ORR R6 ,R10,R11,LSL #16
    
    LDRH R7, [R0]
    LDRH R8, [R0]
    
    LDRH R9, [R0]
    LDRH R10,[R0]
    
    LDRH R11,[R0]
    LDRH LR, [R0]
    
    ORR R7, R7, R8, LSL #16
    ORR R8, R9, R10,LSL #16
    ORR R9, R11,LR, LSL #16
    
    STMIA R1!,{R2-R9}        //32 bytes
    
    SUBS R12,R12,#1
    BNE loop
    
exit:
    
    LDMFD SP!,{R4-R11,LR}
    BX LR
}

//optimzed for speed from this C code:
//void hddPrvWriteSec(volatile void* base,UInt8* buf){
//    volatile UInt16* p = base;
//    UInt16* src = (UInt16*)buf;
//    UInt32 left = 256;
//    p += 5; //point it to I/O address
//    while(left--) *p = *src++;
//}
//advantage is that this ASM version writes 32 bytes at once
static asm void hddPrvWriteSec(volatile void* base,UInt8* buf){

    STMFD SP!,{R4-R11,LR}
    ADD R0,R0,#0x10
    MOV R12,#16
    
loop:
    
    LDMIA R1!,{R2-R9}        //32 bytes
    
    MOV R10,R2, LSR #16
    MOV R11,R3, LSR #16
    MOV LR ,R4, LSR #16
    
    STRH R2, [R0]
    STRH R10,[R0]
    
    STRH R3, [R0]
    STRH R11,[R0]
    
    STRH R4, [R0]
    STRH LR, [R0]
    
    MOV R2, R5, LSR #16
    MOV R3, R6, LSR #16
    MOV R4, R7, LSR #16
    MOV R10,R8, LSR #16
    MOV R11,R9, LSR #16
    
    STRH R5, [R0]
    STRH R2, [R0]
    
    STRH R6, [R0]
    STRH R3, [R0]
    
    STRH R7, [R0]
    STRH R4, [R0]
    
    STRH R8, [R0]
    STRH R10,[R0]
    
    STRH R9, [R0]
    STRH R11,[R0]
    
    SUBS R12,R12,#1
    BNE loop
    
exit:
    
    LDMFD SP!,{R4-R11,LR}
    BX LR
}