// Profiler.cpp: implementation of the CProfiler class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "archimedes.h"
#include "Profiler.h"
#include "Globals.h"			// gives access to MEMC in RedSquirrel

// disable warning about labels being too long in the debugger viewer, this
// doesn't affect the generated code
#pragma warning (disable : 4786)
#include <list>

// ??? compiler option /GX used because of C++ exception issues with STL


#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CProfiler::CProfiler()
{
	TRACE("construct CProfiler \n");

	immediateCarry = new uint32[4096];
	immediateValue = new uint32[4096];

	// construct look up table of real values for encoded immediate 
	// values in data processing instructions
	for(int rotate=0; rotate<16; rotate++)
	{
		for(int immediate=0; immediate<256; immediate++)
		{
			// rotate immediate value right by twice the rotate value
			int index = (rotate<<8) | immediate;

			immediateValue[index] = rorOperator(immediate, rotate<<1);
			
			// should carry be set
			if( getBit(immediateValue[index], 31) )
			{
				immediateCarry[index] = 1;
			}
			else
			{
				immediateCarry[index] = 0;
			}
		}
	}

	optimiser = new COptimiser();

	// open report file
	BOOL armSuccess = armReportFile.Open(
		_T("D:\\Work\\Project\\armDiss.txt"),
		CFile::modeCreate		|
		CFile::modeWrite		|
		CFile::shareDenyWrite	|
		CFile::typeText
		);
	

	if(useTestMemory)
	{
			
		TRACE("PROFILER USING TEST MEMORY\n");

		testMemory = new CTestMemory(0x1000);

		
		if( !testMemory->loadIntoMemory("D:\\Work\\Project\\SourcePrograms\\tests\\armleteg", 0, -1) ) //  64bitadd redunflg  gcdInR0,ff8   sumInR0o   divideO,ff8    gototest
			TRACE("error loading test file! \n");

		recompile(0);

		// ??? debugging, dump disassembled armlets
		TRACE("armlet disassembly\n");
		CArmletDisassembler armletDiss;
		int dissCounter = 0;
		while( !armletList->empty() )
		{
			TRACE("0x%x: %s\n", dissCounter, armletDiss.disassemble(0, (Armlet*)armletList->removeHead() ) );
			dissCounter++;
		}
		TRACE("end diss\n");
	}
}

CProfiler::~CProfiler()
{
	delete []immediateCarry;
	delete []immediateValue;

	delete optimiser;
}

//
// start recompiling a chunk from a given address
//

void CProfiler::recompile(uint32 address)
{

	// create linked list to translate armlets into
	armletList = new CLinkedList();

	// create a list to store information about conditional blocks
	conditionalBlockList = new CLinkedList();

	// invalidate previous ARM condition code
	previousARMConditionCode = -1;
	// no previous instruction to effect the condition flags in this block
	flagsAdjusted = FALSE;
	// define pointer to last conditional armlet goto
	lastGoto = NULL;
	// initialise armletCounter to not point to indicate any armlet
	armletCounter = -1;
	// there was no previous armlet
	lastArmlet = NULL;
	// reset basic block counter
	basicBlockCount = 0;
	// record start address of chunk
	startAddress = address;
	// we're not initially in a conditional block
	inConditionalBlock = FALSE;
	
	BOOL inChunk = TRUE;
	while(inChunk)
	{
		inChunk = translateARMtoArmlets(address, readWord(address) );
		address+=4;
	}

	optimiser->optimise(armletList);

	// ??? delete empty linked list (elements removed in optimiser)
	// delete armletList;

	// delete conditional block list
	conditionalBlockList->clear();

	// ??? go through and delete all data in map
	// addressToArmletNumber.clear();
}

//
// add the necessary armlets for a given ARM instruction to the current chunk
// returns TRUE if the recompiler should continue to the next instruction
// i.e. if the instruction was a non-terminal or conditional-terminal
//

BOOL CProfiler::translateARMtoArmlets(uint32 address, uint32 instruction)
{

	// increment basic block counter (the number of instructions for intcheck)
	basicBlockCount++;

	// ??? debugging
	CArmDisassembler diss;

	TRACE("0x%x: %s \n ", address, diss.disassemble(address, instruction) );

	CString armDump;
	armDump.Format("0x%x: %s \n ", address, diss.disassemble(address, instruction) );
	armReportFile.WriteString(armDump);

	// reset available temps for new instruction
	resetAvailableTemps();

	// associate the ARM address of this instruction with the number
	// of the first armlet in this instruction
	addressToArmletNumber[address] = armletCounter + 1;

	// get this instruction's condition code
	uint8 conditionCode = getField(instruction, 28, 31);

	// handle conditional execution of instructions by generating a
	// goto to the next instruction and attempt to put sequential instructions
	// which all have the same condition code in one comparison of the condition

	// if condition code was different for previous instruction
	// OR the previous instruction adjusted the flags and we're in a conditional block
	if(conditionCode != previousARMConditionCode || (inConditionalBlock && flagsAdjusted) )
	{
		// then end conditional block if there was one and start a new conditional block

		// if a block of conditional instructions has been identified
		// and this instruction's condition code is not NV (as this can't affect things)
		// since this won't affect a conditionally executed block
		if(inConditionalBlock && conditionCode != ccNV)
		{
			// test that we never backpatch an AL conditional execution
			// as it isn't necessary
			if(previousARMConditionCode == ccAL)
				TRACE("ERROR translateARMtoArmlets() backpatching an AL block!! \n");
			
			// at start of previous condition code, a goto will have been added
			// which needs to be set to point to this instruction's armlets so
			// fill in the label for that previous goto to point to this instruction
			lastGoto->value = armletCounter + 1;

			// update conditionalBlock information to point to last instruction
			// in conditional block
			currentConditionalBlockInfo->endArmlet = armletCounter;
		}

		// if this instruction is a branch (not branch link because of r14 adjustments)
		// within the chunk and then we want to generate a conditional goto
		// for the branch, not conditional block stuff
		// note, if instructions before and after have same condition this would be
		// be sub-optimal, however since we're not in a condition block and since the
		// instruction after is not executed because this is a branch, that's not a problem

		if( getField(instruction, 24, 27) == 10)
		{
			// branch detected and we're not in a conditional block
			// if it's conditionally executed then we want to generate a conditional goto
			if(conditionCode != ccAL && conditionCode != ccNV)
			{
				// get branch offset
				uint32 offset = getField(instruction, 0, 23);
				// determine new PC value
				uint32 destination = (address + 8) + (offset << 2);
				// mask to 26 bit addressable space
				destination &= 0x3FFFFFF;

				// if branch is to internal part of chunk already translated
				if( !(destination > address) && (destination >= startAddress) )
				{
					uint8 gotoOpcode = irGOTOEQ + (conditionCode - ccEQ);
					translateBranchInternal(destination, gotoOpcode);

					// handle generic stuff
					inConditionalBlock = FALSE;
					previousARMConditionCode = conditionCode;
					// continue recompiling since conditional instruction but skip the rest
					return TRUE;
				}
			}
		}

		// optionally start a new conditionally executed block (if cc != NV or AL)
		// act on this instruction's condition code
		inConditionalBlock = TRUE; // most cases mark start of a conditional block
		switch( conditionCode )
		{
			// for conditional executions, emit a goto with the inverse condition
			// code of the ARM instruction and no destination (to be backpatched later)
			// mark this armlet as the lastGoto so it can be backpatched
			case ccEQ	:	emitV(irGOTONE, -1, getCondFlags(ccEQ), fNONE); lastGoto = lastArmlet; break;
			case ccNE	:	emitV(irGOTOEQ, -1, getCondFlags(ccNE), fNONE); lastGoto = lastArmlet; break;
			case ccCS	:	emitV(irGOTOCC, -1, getCondFlags(ccCS), fNONE); lastGoto = lastArmlet; break;
			case ccCC	:	emitV(irGOTOCS, -1, getCondFlags(ccCC), fNONE); lastGoto = lastArmlet; break;
			case ccMI	:	emitV(irGOTOPL, -1, getCondFlags(ccMI), fNONE); lastGoto = lastArmlet; break;
			case ccPL	:	emitV(irGOTOMI, -1, getCondFlags(ccPL), fNONE); lastGoto = lastArmlet; break;
			case ccVS	:	emitV(irGOTOVC, -1, getCondFlags(ccVS), fNONE); lastGoto = lastArmlet; break;
			case ccVC	:	emitV(irGOTOVS, -1, getCondFlags(ccVC), fNONE); lastGoto = lastArmlet; break;
			case ccHI	:	emitV(irGOTOLS, -1, getCondFlags(ccHI), fNONE); lastGoto = lastArmlet; break;
			case ccLS	:	emitV(irGOTOHI, -1, getCondFlags(ccLS), fNONE); lastGoto = lastArmlet; break;
			case ccGE	:	emitV(irGOTOLT, -1, getCondFlags(ccGE), fNONE); lastGoto = lastArmlet; break;
			case ccLT	:	emitV(irGOTOGE, -1, getCondFlags(ccLT), fNONE); lastGoto = lastArmlet; break;
			case ccGT	:	emitV(irGOTOLE, -1, getCondFlags(ccGT), fNONE); lastGoto = lastArmlet; break;
			case ccLE	:	emitV(irGOTOGT, -1, getCondFlags(ccLE), fNONE); lastGoto = lastArmlet; break;
			case ccAL	:
				// don't generate a goto to after this condition since it's AL
				inConditionalBlock = FALSE;
				break;
			case ccNV	:
				// don't generate any code for this ARM instruction so return
				return TRUE;
				// note, doesn't end conditional block either
				// note: if on later ARM architecture then will generate code
				break;
		}

		// if we've just allocated a GOTO for the start of a conditional block
		if(inConditionalBlock && conditionCode != ccNV)
		{
			// create new block info
			currentConditionalBlockInfo = new ConditionalBlockInfo;

			// add it to the linked list
			conditionalBlockList->addToTail(currentConditionalBlockInfo);

			currentConditionalBlockInfo->conditionCode = conditionCode;
			// point start of block to the goto instruction
			currentConditionalBlockInfo->startArmlet = armletCounter;
		}

		// no longer care if flags adjusted, since we've started/ended block anyway
		flagsAdjusted = FALSE;
	}

	uint32 continueTranslation = FALSE;
	// decode instruction based on bits 24 - 27
	switch( getField(instruction, 24, 27) )
	{
		case 0	:	continueTranslation = translateMultiplyOrDataProcessing(address, instruction); break;
		case 1	:	continueTranslation = translateSingleDataSwapOrDataProcessing(address, instruction); break;
		case 2	:	continueTranslation = translateDataProcessing(address, instruction); break;
		case 3	:	continueTranslation = translateDataProcessing(address, instruction); break;
		case 4	:	continueTranslation = translateSingleDTImmOffsetPostIndex(address, instruction); break;
		case 5	:	continueTranslation = translateSingleDTImmOffsetPreIndex(address, instruction); break;
		case 6	:	continueTranslation = translateSingleDTRegOffsetPostIndex(address, instruction); break;
		case 7	:	continueTranslation = translateSingleDTRegOffsetPreIndex(address, instruction); break;
		case 8	:	continueTranslation = translateBlockDataTransfer(address, instruction); break;
		case 9	:	continueTranslation = translateBlockDataTransfer(address, instruction); break;
		case 10	:	continueTranslation = translateBranch(address, instruction); break;
		case 11	:	continueTranslation = translateBranchWithLink(address, instruction); break;
		case 12 :	continueTranslation = translateCoProDTPreIndex(address, instruction); break;
		case 13 :	continueTranslation = translateCoProDTPostIndex(address, instruction); break;
		case 14	:	continueTranslation = translateCoProRegTransferOrDataOperation(address, instruction); break;
		case 15	:	continueTranslation = translateSoftwareInterrupt(address, instruction); break;
	}

	// continueTranslation denotes whether the chunk should end if this
	// instruction is executed. If it is conditionally executed then
	// continue to recompile the chunk
	if(!continueTranslation)
	{
		if(conditionCode != ccAL)
		{
			continueTranslation = TRUE;
		}
	}

	// store the condition code of this instruction for when we look at the next one
	previousARMConditionCode = conditionCode;

	return continueTranslation;
}

//////////////////////////////////////////////////////////////////////
// Translation functions
//////////////////////////////////////////////////////////////////////

//
// Further decode whether multiple or data processing instruction
//

BOOL CProfiler::translateMultiplyOrDataProcessing(uint32 address, uint32 instruction)
{
	if(	isExtendedInstruction(instruction) ) 
	{
		// multiply
		return translateMultiply(address, instruction);
	}
	else
	{
		// data processing
		return translateDataProcessing(address, instruction);
	}
}

//
// Further decode whether single data swap or data processing instruction
//

BOOL CProfiler::translateSingleDataSwapOrDataProcessing(uint32 address, uint32 instruction)
{
	if( isExtendedInstruction(instruction) )
	{
		// single data swap
		return translateSingleDataSwap(address, instruction);
	}
	else
	{
		// data processing
		return translateDataProcessing(address, instruction);
	}
}

//
// Translate data processing instructions, AND, ADD, SUB, EOR etc.
//

BOOL CProfiler::translateDataProcessing(uint32 address, uint32 instruction)
{
	uint8 op1, op2;

	uint8 opcode = getField(instruction, 21, 24);

	//////////
	// sort out operand 1
	//////////

	op1 = getField(instruction, 16, 19);
	
	// if second operand is register (ok if it's immediate)
	if( !getBit(instruction, 25) )
	{
		// if op1 is r15 (we can statically determine it)
		if(op1 == 15)
		{
			// if shift on op2 is register specified (bit 4)
			if( getBit(instruction, 4) )
				emit1(irMOVC, vPC, address + 8 + 4, fNONE, fNONE);
			else
				emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
		}
	}

	//////////
	// sort out operand 2
	//////////

	// if second operand is immediate
	if( getBit(instruction, 25) )
	{
		uint32 immediate = immediateValue[ getField(instruction, 0, 11) ];
		// get immediate value
		uint8 tempVar = useNextTemp();
		emit1(irMOVC, tempVar, immediate, fNONE, fNONE);
		op2 = tempVar;

		// if not arithmetic then adjust the carry flag for the immediate
		if( ! (0x0CFC & (1 << opcode)) )
		{
			// if S flag set
			if( getBit(instruction, 20) )
			{
				if( immediateCarry[immediate] )
					emit1(irMOVC, vCFLAG, 1, fNONE, fC);
				else
					emit1(irMOVC, vCFLAG, 0, fNONE, fC);
			}
		}
		
	}
	else
	{
		// second operand is register

		// get register number to be shifted
		uint8 rm = getField(instruction, 0, 3);

		// there's no point in arithmetic instructions calculating the C
		// flag for barrel shifting since it's hammered again by the
		// calculation itself
		BOOL sFlagShifting = getBit(instruction, 20);
		// if S flag is set, set it to false for arithmetic opcodes
		if( sFlagShifting )
		{
			if( 0x0CFC & (1 << opcode) )
				sFlagShifting = FALSE;
		}

		// if shift on op2 is register specified (bit 4)
		if( getBit(instruction, 4) )
		{
			//////////////////////////////////////////
			// SHIFT on op2 register is REGISTER SPECIFIED
			//////////////////////////////////////////
			
			// if rm is r15 (we need to fetch PSR and statically determine PC)
			if(rm == 15)
				emit1(irGETPC, vPC, address + 12, fALL, fNONE);

			// get register containing amount to shift by
			uint8 rs = getField(instruction, 8, 11);

			// if rs is r15 then value of PC is address + 8
			if(rs == 15)
			{
				uint8 adjustedPC = useNextTemp();
				emit1(irMOVC, adjustedPC, address + 8, fNONE, fNONE);
				rs = adjustedPC;
			}

			// cope with fact that for LSL, LSR, ASR, only bottom byte of
			// shift register value must be used (strictly speaking this is
			// also the case for ROR but it won't have any effect so optimise
			// this out. remember - leaves rs unadjusted!
			uint8 shiftType = getField(instruction, 5, 6);
			uint8 tempValue = useNextTemp();
			if(shiftType != ROR)
			{
				emit1(irMOVC, tempValue, 0xff, fNONE, fNONE);
				emit3(irAND, tempValue, rs, tempValue, fNONE, fNONE);
			}

			// decode shift type
			switch(shiftType)
			{
				case LSL :
				{
					uint8 comparator = useNextTemp();
					emit1(irMOVC, comparator, 31, fNONE, fNONE);
					emit2(irCMP, tempValue, comparator, fNONE, fNONE);
					emitV(irGOTOGT, address + 4, fNONE, fNONE); // goto >= 32

					// rs <= 31
					if(sFlagShifting)
						emit3(irLSL, comparator, rm, tempValue, fNONE, fC);
					else
						emit3(irLSL, comparator, rm, tempValue, fNONE, fNONE);
					emitV(irGOTO, address + 11, fNONE, fNONE);  // end of instruction

					// rs >= 32
					emit1(irMOVC, vCFLAG, 0, fNONE, fC); // clear c flag in case > 32 or it's not to be set
					emit1(irMOVC, comparator, 32, fNONE, fNONE);
					emit2(irCMP, comparator, tempValue, fNONE, fNONE);
					emitV(irGOTONE, address + 6, fNONE, fNONE); // goto rs == 32 OR rs > 32
					
					// rs == 32
					uint8 testBit = useNextTemp();
					emit1(irMOVC, testBit, 1, fNONE, fNONE);
					// if bit 0 of rm is set
					emit2(irTST, rm, testBit, fNONE, fNONE);
					emitV(irGOTOEQ, address + 3, fNONE, fNONE); // goto rs == 32 or RS > 32
					emit1(irMOVC, vCFLAG, 1, fNONE, fC);
					
					// rs == 32 OR rs > 32
					emit1(irMOVC, comparator, 0, fNONE, fNONE);

					op2 = comparator;
					break;
				}
				case LSR :
				{
					uint8 comparator = useNextTemp();
					emit1(irMOVC, comparator, 31, fNONE, fNONE);
					emit2(irCMP, tempValue, comparator, fNONE, fNONE);
					emitV(irGOTOGT, address + 4, fNONE, fNONE); // goto >= 32
					
					// rs <= 31
					if(sFlagShifting)
						emit3(irLSR, comparator, rm, tempValue, fNONE, fC);
					else
						emit3(irLSR, comparator, rm, tempValue, fNONE, fNONE);
					emitV(irGOTO, address + 11, fNONE, fNONE);  // end of instruction
					
					// rs >= 32
					emit1(irMOVC, vCFLAG, 0, fNONE, fC); // clear c flag in case > 32 or it's not to be set
					emit1(irMOVC, comparator, 32, fNONE, fNONE);
					emit1(irCMP, comparator, tempValue, fNONE, fNONE);
					emitV(irGOTONE, address + 6, fNONE, fNONE); // goto rs == 32 OR rs > 32
					
					// rs == 32
					uint8 testBit = useNextTemp();
					emit1(irMOVC, testBit, 31, fNONE, fNONE);
					// if bit 0 of rm is set, set C flag
					emit2(irTST, rm, testBit, fNONE, fNONE);
					emitV(irGOTOEQ, address + 3, fNONE, fNONE); // goto rs == 32 or RS > 32
					emit1(irMOVC, vCFLAG, 1, fNONE, fC);
					
					// rs == 32 OR rs > 32
					// set result = 0
					emit1(irMOVC, comparator, 0, fNONE, fNONE);

					op2 = comparator;
					break;
				}
				case ASR :
				{
					int8 comparator = useNextTemp();
					emit1(irMOVC, comparator, 31, fNONE,  fNONE);
					emit2(irCMP, tempValue, comparator, fNONE, fNONE);
					emitV(irGOTOGT, address + 4, fNONE, fNONE); // goto >= 32
					
					// rs <= 31
					if(sFlagShifting)
						emit3(irASR, comparator, rm, tempValue, fNONE, fC);
					else
						emit3(irASR, comparator, rm, tempValue, fNONE, fNONE);
					emitV(irGOTO, address + 10, fNONE, fNONE); // goto end of instruction

					// rs >= 32
					// test bit 31 of rm
					uint8 testBit = useNextTemp();
					emit1(irMOVC, testBit, 31, fNONE, fNONE);
					emit2(irTST, rm, testBit, fNONE, fNONE);
					emitV(irGOTOEQ, address + 5, fNONE, fNONE);   // goto rm[31] = 0

					// rm[31] = 1
					emit1(irMOVC, vCFLAG, 1, fNONE, fC);
					emit1(irMOVC, comparator, 0xffffffff, fNONE, fNONE);
					emitV(irGOTO, address + 4, fNONE, fNONE); // goto end of instruction

					// rm[31] = 0
					emit1(irMOVC, vCFLAG, 0, fNONE, fC);
					emit1(irMOVC, comparator, 0, fNONE, fNONE);

					op2 = comparator;
					break;
				}
				case ROR :
				{
					op2 = useNextTemp();
					if(sFlagShifting)
						emit3(irROR, op2, rm, rs, fNONE, fC);
					else
						emit3(irROR, op2, rm, rs, fNONE, fNONE);
					break;
				}
			} // end switch

		}
		else
		{
			//////////////////////////////////////////
			// SHIFT on op2 register is IMMEDIATE SPECIFIED
			//////////////////////////////////////////

			// if rm is r15 (we need to fetch PSR and statically determine PC)
			if(rm == 15)
				emit1(irGETPC, vPC, address + 8, fALL, fNONE);

			// get immediate amount to shift by
			uint8 shiftAmount = getField(instruction, 7, 11);

			// deal with special cases where shift amount is 0 e.g. LSR#32, RRX
			if(shiftAmount == 0)
			{
				
				switch( getField(instruction, 5, 6) )
				{
					case LSL : op2 = rm; break;
					case LSR : 
					{
						// represents LSR #32
						uint8 tempVar = useNextTemp();
						if(sFlagShifting)
						{	
							// if bit 31 of register being shifted is set
							emit1(irMOVC, tempVar, 0x80000000, fNONE, fNONE);
							emit2(irTST, rm, tempVar, fNONE, fNONE);
							emitV(irGOTOEQ, armletCounter + 4, fNONE, fNONE);
							// set carry flag
							emit1(irMOVC, vCFLAG, 1, fNONE, fC);
							emitV(irGOTO, armletCounter + 3, fNONE, fNONE);
							// else clear the carry flag
							emit1(irMOVC, vCFLAG, 0, fNONE, fC);
						}
						// result of shift is 0
						emit1(irMOVC, tempVar, 0, fNONE, fNONE);
						op2 = tempVar;
						break;
					}
					case ASR :
					{
						// carry = getBit(regValue, 31);
						// result = asrOperator(regValue, 31);
						
						// represents ASR #32 (set all bits to value of bit 31)
						uint8 tempVar = useNextTemp();
						if(sFlagShifting)
						{
							// if bit 31 set
							emit1(irMOVC, tempVar, 0x80000000, fNONE, fNONE);
							emit2(irTST, rm, tempVar, fNONE, fNONE);
							emitV(irGOTOEQ, armletCounter + 5, fNONE, fNONE);
							// then set C flag and set result to all bits set
							emit1(irMOVC, vCFLAG, 1, fNONE, fC);
							emit1(irMOVC, tempVar, 0xffffffff, fNONE, fNONE);
							emitV(irGOTO, armletCounter + 4, fNONE, fNONE);
							// else clear C flag and set result to all bits clear
							emit1(irMOVC, vCFLAG, 0, fNONE, fC);
							emit1(irMOVC, tempVar, 0, fNONE, fNONE);
						}
						else
						{
							// if bit 31 set
							emit1(irMOVC, tempVar, 0x80000000, fNONE, fNONE);
							emit2(irTST, rm, tempVar, fNONE, fNONE);
							emitV(irGOTOEQ, armletCounter + 4, fNONE, fNONE);
							// set result to all bits set
							emit1(irMOVC, tempVar, 0xffffffff, fNONE, fNONE);
							emitV(irGOTO, armletCounter + 3, fNONE, fNONE);
							// else set result to all bits clear
							emit1(irMOVC, tempVar, 0, fNONE, fNONE);
						}
						op2 = tempVar;
						break;
					}
					case ROR :
					{
						// ROR #0 = RRX
						uint8 tempVar = useNextTemp();
						if(sFlagShifting)
							emit2(irRRX, tempVar, rm, fNONE, fNONE); 
						else
							emit2(irRRX, tempVar, rm, fNONE, fNONE);
						op2 = tempVar;
						break;
					}
				}
			}
			else
			{
				// shiftAmount > 0

				uint8 tempVar = useNextTemp();
				emit2(irMOVC, tempVar, shiftAmount, fNONE, fNONE);

				// set flags to be set
				uint8 flagSettings;
				if(sFlagShifting)
					flagSettings = fC;
				else
					flagSettings = fNONE;

				uint8 tempVar2 = useNextTemp();

				switch( getField(instruction, 5, 6) )
				{
					case LSL : emit3(irLSL, tempVar2, rm, tempVar, fNONE, flagSettings); break;
					case LSR : emit3(irLSR, tempVar2, rm, tempVar, fNONE, flagSettings); break;
					case ASR : emit3(irASR, tempVar2, rm, tempVar, fNONE, flagSettings); break;
					case ROR : emit3(irROR, tempVar2, rm, tempVar, fNONE, flagSettings); break;
				}

				op2 = tempVar2;
			}
		}
	}

	// operands 1 and 2 decoded

	// decode destination reg
	uint8 rd = getField(instruction, 12, 15);

	// decode flag adjustments for instructions
	uint8 outFlags;
	// if S flag is set
	BOOL sFlag = getBit(instruction, 20);
	if( sFlag )
	{
		// arithmetic adjust all flags
		// logic instructions adjust N and Z flags (C changed from barrel shifter)
		if( 0x0CFC & (1 << opcode) )
			outFlags = fALL;
		else
			outFlags = fN | fZ;
	}
	else
	{
		outFlags = fNONE;
	}


	// decode instruction opcode
	switch( opcode )
	{
		case 0  : emit3(irAND, rd, op1, op2, fNONE, outFlags); break; // and
		case 1  : emit3(irEOR, rd, op1, op2, fNONE, outFlags); break; // eor
		case 2  : emit3(irSUB, rd, op1, op2, fNONE, outFlags); break; // sub
		case 3  : emit3(irSUB, rd, op2, op1, fNONE, outFlags); break; // rsb
		case 4  : emit3(irADD, rd, op1, op2, fNONE, outFlags); break; // add
		case 5  : emit3(irADC, rd, op1, op2, fC, outFlags); break; // adc
		case 6  : emit3(irSBC, rd, op1, op2, fC, outFlags); break; // sbc
		case 7  : emit3(irSBC, rd, op2, op1, fC, outFlags); break; // rsc
		case 8  : checkPBit(address, instruction, op1, op2, outFlags); break; // tst
		case 9  : checkPBit(address, instruction, op1, op2, outFlags); break; // teq
		case 10 : checkPBit(address, instruction, op1, op2, outFlags); break; // cmp
		case 11 : checkPBit(address, instruction, op1, op2, outFlags); break; // cmn
		case 12 : emit3(irORR, rd, op1, op2, fNONE, outFlags); break; // orr
		case 13 : emit3(irMOV, rd, op2, vUNUSED, fNONE, outFlags); break; // mov
		case 14 :
		{
			// bic
			uint8 invertedVar = useNextTemp();
			emit3(irMVN, invertedVar, op2, vUNUSED, fNONE, fNONE);
			emit3(irAND, rd, op1, op2, fNONE, outFlags);
			break;
		}
		case 15 : emit3(irMVN, rd, op2, vUNUSED, fNONE, outFlags); break; // mvn
	}

	return checkRdForPC(rd, sFlag, address);
}

//
// For TEQ,TST,CMP,CMN if they are P variant then adjust PSR
//

void CProfiler::checkPBit(uint32 address, uint32 instruction, uint8 op1, uint8 op2, uint8 outFlags)
{
	// if they are the P variant
	if( getField(instruction, 12, 15) == 0xf )
	{
		// calculate result
		uint8 result = useNextTemp();
		switch( getField(instruction, 21, 24) )
		{
			case 8	: emit3(irAND, result, op1, op2, fNONE, outFlags); break; // tst
			case 9  : emit3(irEOR, result, op1, op2, fNONE, outFlags); break; // teq
			case 10 : emit3(irCMP, result, op1, op2, fNONE, outFlags); break; // cmp
			case 11 : emit3(irCMN, result, op1, op2, fNONE, outFlags); break; // cmn
		}

		// check user mode
		uint8 compareVar = useNextTemp();
		emit1(irMOVC, compareVar, USR_MODE, fNONE, fNONE);
		emit2(irCMP, vMODE, compareVar, fNONE, fNONE);
		// if in user mode then skip this
		emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);

		// priviledged mode so update NZCV, I, F, mode
		// but don't adjust the PC
		emit3(irMOV, vPC, result, vUNUSED, fNONE, fNONE);
		// mask out adjustments to the PC
		emit1(irMOVC, result, 0xfc000003, fNONE, fNONE);
		emit3(irAND, vPC, vPC, result, fNONE, fNONE);
		// stick current PC in it's place
		emit1(irMOVC, result, address + 8, fNONE, fNONE);
		emit3(irORR, vPC, vPC, result, fNONE, fNONE);
		// signal updated flags
		emit(irSETPC, fALL, fNONE);
		emitInterruptCheck(address, FALSE);
		emitV(irLEAVE, leavePSRChanged, fALL, fNONE);
		
		// user mode
		// only adjust NZCV
		uint8 flagVar = useNextTemp();
		uint8 shiftAndVal = useNextTemp();

		// N flag
		emit1(irMOVC, shiftAndVal, 31, fNONE, fNONE);
		emit3(irLSR, flagVar, result, shiftAndVal, fNONE, fNONE);
		emit1(irMOVC, shiftAndVal, 1, fNONE, fNONE);
		emit3(irAND, vNFLAG, flagVar, shiftAndVal, fNONE, fN);

		// Z flag
		emit1(irMOVC, shiftAndVal, 30, fNONE, fNONE);
		emit3(irLSR, flagVar, result, shiftAndVal, fNONE, fNONE);
		emit1(irMOVC, shiftAndVal, 1, fNONE, fNONE);
		emit3(irAND, vZFLAG, flagVar, shiftAndVal, fNONE, fN);

		// C flag
		emit1(irMOVC, shiftAndVal, 29, fNONE, fNONE);
		emit3(irLSR, flagVar, result, shiftAndVal, fNONE, fNONE);
		emit1(irMOVC, shiftAndVal, 1, fNONE, fNONE);
		emit3(irAND, vCFLAG, flagVar, shiftAndVal, fNONE, fN);

		// V flag
		emit1(irMOVC, shiftAndVal, 28, fNONE, fNONE);
		emit3(irLSR, flagVar, result, shiftAndVal, fNONE, fNONE);
		emit1(irMOVC, shiftAndVal, 1, fNONE, fNONE);
		emit3(irAND, vVFLAG, flagVar, shiftAndVal, fNONE, fN);

		// continue ok
	}
	else
	{
		// emit normal instructions
		switch( getField(instruction, 21, 24) )
		{
			case 8	: emit2(irTST, op1, op2, fNONE, outFlags); break; // tst
			case 9  : emit2(irTEQ, op1, op2, fNONE, outFlags); break; // teq
			case 10 : emit2(irCMP, op1, op2, fNONE, outFlags); break; // cmp
			case 11 : emit2(irCMN, op1, op2, fNONE, outFlags); break; // cmn
		}
	}
}


//
// Translate Single Data Transfer with Register-specified Post-Indexing (therefore implied writeback)
//


BOOL CProfiler::translateSingleDTImmOffsetPostIndex(uint32 address, uint32 instruction)
{
	
	// if writeback flag (bit 21) is set in a post-indexed instruction
	// then signifies T bit being set (since writeback is implicit in post index)
	if( getBit(instruction, 21) )
	{
		emit(irCLEARTRANS, fNONE, fNONE);
	}

	// get base address reg
	uint8 rn = getField(instruction, 16, 19);
	// get register to load to/store from
	uint8 rd = getField(instruction, 12, 15);

	uint8 successFlag = useNextTemp();		// temp for returning RAM access success
	Armlet* unsuccessfulGoto;				// pointer to goto for when access fails

	// test rn for 26 bit address limit
	uint8 addressTest = useNextTemp();
	emit1(irMOVC, addressTest, 0xFC000000, fNONE, fNONE);
	uint8 addressTestResult = useNextTemp();
	emit3(irAND, addressTestResult, rn, addressTest, fNONE, fNONE);
	emit1(irMOVC, addressTest, 0, fNONE, fNONE);
	emit2(irCMP, addressTestResult, addressTest, fNONE, fNONE);
	// if address is ok then skip leave
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveAddressException, fALL, fNONE);

	// check load/store
	if( getBit(instruction, 20) )
	{
		// load

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irLDB, rd, rn, successFlag, fNONE, fNONE);
		else
			emit3(irLDW, rd, rn, successFlag, fNONE, fNONE);
			
		uint8 testValue = useNextTemp();
		// 0 = successful load
		// if not successful then jump to later on
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTONE, 0xffffffff, fNONE, fNONE);	// to be backpatched
		unsuccessfulGoto = lastArmlet;	// store ref to the goto
	}
	else
	{
		// store

		// if rd==PC
		if(rd == 15)
		{
			emit1(irGETPC, vPC, address + 12, fALL, fNONE);
			// if byte quantity
			if( getBit(instruction, 22) )
			{
				uint8 tempByte = useNextTemp();
				emit1(irMOVC, tempByte, 0xff, fNONE, fNONE);
				emit3(irAND, vPC, vPC, tempByte, fNONE, fNONE);
			}
		}

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irSTB, rd, rn, successFlag, fNONE, fNONE);
		else
			emit3(irSTW, rd, rn, successFlag, fNONE, fNONE);

		// 0 = successful store
		// if successful then jump to later on
		uint8 testValue = useNextTemp();
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTONE, 0xffffffff, fNONE, fNONE);	// to be backpatched
		unsuccessfulGoto = lastArmlet;	// store ref to the goto
	}

	// if access was successful, continue with updating base address...

	// get immediate offset
	uint32 offset = getField(instruction, 0, 11); 
	// if offset != 0
	if(offset)
	{
		// update base address (only if mem access went ok)
		uint8 offsetVar = useNextTemp();
		emit1(irMOVC, offsetVar, offset, fNONE, fNONE);	
		// check add/sub offset
		// note, rn must not be r15 with writeback so don't worry about it (see ARM610 datasheet p38)
		if( getBit(instruction, 23) )
		{
			// add offset
			emit3(irADD, rn, rn, offsetVar, fNONE, fNONE);
		}
		else
		{
			// subtract offset
			emit3(irSUB, rn, rn, offsetVar, fNONE, fNONE);
		}
	}

	// if access was unsuccessful continues here (for store)... 

	// unclear trans if T bit set and not in priviledged mode
	if( getBit(instruction, 21) )
	{
		// if not in user mode, clear trans
		uint8 tempVar = useNextTemp();
		emit1(irMOVC, tempVar, USR_MODE, fNONE, fNONE);
		emit2(irCMP, tempVar, vMODE, fNONE, fNONE);
		emitV(irGOTOEQ, armletCounter + 3, fNONE, fNONE); // skip cleartrans if in user mode
		emit(irSETTRANS, fNONE, fNONE);
	}

	// backpatch goto for unsuccessful memory access
	if( getBit(instruction, 20) || !getBit(instruction, 21) )
	{
		// if load or if T bit not set
		// T handling only done if load was successful so set goto to after T handling
		unsuccessfulGoto->value = armletCounter + 1;
	}
	else
	{
		// if store and T bit set
		// T handling done whether store was successful or not so set goto to before T handling
		unsuccessfulGoto->value = armletCounter - 3;
	}

	// if access was unsuccessful continues here (for load)...

	// check for unsuccessful access
	// if mem access was unsuccessful then C function will have triggered data
	// abort or address exception, so now we have to leave, returning the success variable
	uint8 testValue = useNextTemp();
	emit1(irMOVC, testValue, 0, fNONE, fNONE);
	emit2(irCMP, successFlag, testValue, fNONE, fNONE);
	emitV(irGOTONE, armletCounter + 9, fNONE, fNONE);	// if successful then skip this leave bit
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

	// if rd was PC and load instruction (and hence PC has changed)
	if(rd == 15 && getBit(instruction, 20) )
	{
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		return FALSE;
	}
	else
	{
		return TRUE;
	}
}


//
// Translate Single Data Transfer with Immediate-specified Pre-Indexing
//


BOOL CProfiler::translateSingleDTImmOffsetPreIndex(uint32 address, uint32 instruction)
{

	// get base address reg
	uint8 rn = getField(instruction, 16, 19);
	// get register to load to/store from
	uint8 rd = getField(instruction, 12, 15);

	uint8 successFlag = useNextTemp();		// temp for returning RAM access success

	// precalculate the address
	uint8 addressVar = useNextTemp();
	// get immediate offset
	uint32 offset = getField(instruction, 0, 11);
	// check add/sub offset
	if( getBit(instruction, 23) )
	{
		// add offset
		if(rn == 15)
		{
			// then can statically determine new rn value completely
			emit1(irMOVC, addressVar, address + 8 + offset, fNONE, fNONE);
		}
		else
		{
			// if offset != 0
			if(offset)
			{
				uint8 offsetVar = useNextTemp();
				emit1(irMOVC, offsetVar, offset, fNONE, fNONE);
				emit3(irADD, addressVar, rn, offsetVar, fNONE, fNONE);
			}
			else
			{
				// if offset == 0 then rn before is same as rn after
				addressVar = rn;
			}
		}
	}
	else
	{
		// subtract offset
		if(rn == 15)
		{
			// then can statically determine new rn value completely
			emit1(irMOVC, addressVar, (address + 8) - offset, fNONE, fNONE);
		}
		else
		{
			// if offset != 0
			if(offset)
			{
				uint8 offsetVar = useNextTemp();
				emit1(irMOVC, offsetVar, offset, fNONE, fNONE);
				emit3(irSUB, addressVar, rn, offsetVar, fNONE, fNONE);
			}
			else
			{
				// if offset == 0 then rn before is same as rn after
				addressVar = rn;
			}
		}
	}

	// test addressVar for 26 bit address limit
	uint8 addressTest = useNextTemp();
	emit1(irMOVC, addressTest, 0xFC000000, fNONE, fNONE);
	uint8 addressTestResult = useNextTemp();
	emit3(irAND, addressTestResult, addressVar, addressTest, fNONE, fNONE);
	emit1(irMOVC, addressTest, 0, fNONE, fNONE);
	emit2(irCMP, addressTestResult, addressTest, fNONE, fNONE);
	// if address is ok then skip leave
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveAddressException, fALL, fNONE);

	// check load/store
	if( getBit(instruction, 20) )
	{
		// load

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irLDB, rd, addressVar, successFlag, fNONE, fNONE);
		else
			emit3(irLDW, rd, addressVar, successFlag, fNONE, fNONE);

		uint8 testValue = useNextTemp();
		// 0 = successful access
		// if successful then jump to later on
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);		// skip leave if successful
	}
	else
	{
		// store

		// if rd==PC
		if(rd == 15)
		{
			emit1(irGETPC, vPC, address + 12, fALL, fNONE);
			// if byte quantity
			if( getBit(instruction, 22) )
			{
				uint8 tempByte = useNextTemp();
				emit1(irMOVC, tempByte, 0xff, fNONE, fNONE);
				emit3(irAND, vPC, vPC, tempByte, fNONE, fNONE);
			}
		}

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irSTB, rd, addressVar, successFlag, fNONE, fNONE);
		else
			emit3(irSTW, rd, addressVar, successFlag, fNONE, fNONE);

		uint8 testValue = useNextTemp();
		// 0 = successful access
		// if successful then jump to later on
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);		// skip leave if successful
	}

	// leaving code since access unsuccessful
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

	// successful, so...

	// optional writeback
	if( getBit(instruction, 21) )
	{
		// don't worry about rn==r15 since according to ARM610 datasheet
		// "write-back shall not be specified if r15 is specified as the base register"
		emit3(irMOV, rn, addressVar, vUNUSED, fNONE, fNONE);
	}
	
	// if rd was PC and load instruction (and hence PC has changed)
	if(rd == 15 && getBit(instruction, 20) )
	{
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		return FALSE;
	}
	else
	{
		return TRUE;
	}
}

//
// Translate Single Data Transfer with Register-Specified Post-Indexing (therefore implied writeback)
//

BOOL CProfiler::translateSingleDTRegOffsetPostIndex(uint32 address, uint32 instruction)
{
	
	// if bit 4 set then it's an undefined instruction not a single data transfer
	if( getBit(instruction, 4) )
	{
		// ??? throw undefined instruction exception
		TRACE("ERROR - UNDEF INSTRUCTION EXCEPTION TRANSLATED IN translateSingleDTRegOffsetPostIndex() \n");
		return FALSE;
	}

	// if writeback flag (bit 21) is set in a post-indexed instruction
	// then signifies T bit being set (since writeback is implicit in post index)
	if( getBit(instruction, 21) )
	{
		emit(irCLEARTRANS, fNONE, fNONE);
	}

	// get base address reg
	uint8 rn = getField(instruction, 16, 19);
	// get register to load to/store from
	uint8 rd = getField(instruction, 12, 15);

	uint8 successFlag = useNextTemp();		// temp for returning RAM access success
	Armlet* unsuccessfulGoto;				// pointer to goto for when access fails

	// test rn for 26 bit address limit
	uint8 addressTest = useNextTemp();
	emit1(irMOVC, addressTest, 0xFC000000, fNONE, fNONE);
	uint8 addressTestResult = useNextTemp();
	emit3(irAND, addressTestResult, rn, addressTest, fNONE, fNONE);
	emit1(irMOVC, addressTest, 0, fNONE, fNONE);
	emit2(irCMP, addressTestResult, addressTest, fNONE, fNONE);
	// if address is ok then skip leave
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveAddressException, fALL, fNONE);

	// check load/store
	if( getBit(instruction, 20) )
	{
		// load

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irLDB, rd, rn, successFlag, fNONE, fNONE);
		else
			emit3(irLDW, rd, rn, successFlag, fNONE, fNONE);
			
		uint8 testValue = useNextTemp();
		// 0 = successful load
		// if not successful then jump to later on
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTONE, 0xffffffff, fNONE, fNONE);	// to be backpatched
		unsuccessfulGoto = lastArmlet;	// store ref to the goto
	}
	else
	{
		// store

		// if rd==PC
		if(rd == 15)
		{
			emit1(irGETPC, vPC, address + 12, fALL, fNONE);
			// if byte quantity
			if( getBit(instruction, 22) )
			{
				uint8 tempByte = useNextTemp();
				emit1(irMOVC, tempByte, 0xff, fNONE, fNONE);
				emit3(irAND, vPC, vPC, tempByte, fNONE, fNONE);
			}
		}

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irSTB, rd, rn, successFlag, fNONE, fNONE);
		else
			emit3(irSTW, rd, rn, successFlag, fNONE, fNONE);

		// 0 = successful store
		// if successful then jump to later on
		uint8 testValue = useNextTemp();
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTONE, 0xffffffff, fNONE, fNONE);	// to be backpatched
		unsuccessfulGoto = lastArmlet;	// store ref to the goto
	}

	// if access was successful, continue with updating base address...


	
	// update base address (only if mem access went ok)
	
	// shift on offset reg is same as for data processing instruction
	// except the shift amount must be register specified
	
	// get register offset (unshifted)
	uint32 offsetReg = getField(instruction, 0, 3);
	// if rm is r15 (we need to fetch PSR and statically determine PC)
	if(offsetReg == 15)
		emit1(irGETPC, vPC, address + 8, fALL, fNONE);
	// get immediate amount to shift by
	uint8 shiftAmount = getField(instruction, 7, 11);
	// var to hold final offset value
	uint8 offset = useNextTemp();

	// deal with special cases where shift amount is 0 e.g. LSR#32, RRX
	if(shiftAmount == 0)
	{
		switch( getField(instruction, 5, 6) )
		{
			case LSL : offset = offsetReg; break;
			// represents LSR #32
			case LSR : emit1(irMOVC, offset, 0, fNONE, fNONE); break;
			case ASR : emit3(irASR, offset, offsetReg, 31, fNONE, fNONE); break;
			// encoding for ROR #0 denotes RRX
			case ROR : emit2(irRRX, offset, offsetReg, fC, fNONE); break;
		}
	}
	else
	{
		// shiftAmount > 0
		uint8 tempVar = useNextTemp();
		emit2(irMOVC, tempVar, shiftAmount, fNONE, fNONE);
		switch( getField(instruction, 5, 6) )
		{
			case LSL : emit3(irLSL, offset, offsetReg, tempVar, fNONE, fNONE); break;
			case LSR : emit3(irLSR, offset, offsetReg, tempVar, fNONE, fNONE); break;
			case ASR : emit3(irASR, offset, offsetReg, tempVar, fNONE, fNONE); break;
			case ROR : emit3(irROR, offset, offsetReg, tempVar, fNONE, fNONE); break;
		}
	}

	// check add/sub offset
	// note, rn must not be r15 so don't worry about it (see ARM610 datasheet p38)
	if( getBit(instruction, 23) )
	{
		// add offset
		emit3(irADD, rn, rn, offset, fNONE, fNONE);
	}
	else
	{
		emit3(irSUB, rn, rn, offset, fNONE, fNONE);
	}

	// if access was unsuccessful continues here (for store)... 

	// unclear trans if T bit set and not in priviledged mode
	if( getBit(instruction, 21) )
	{
		// if not in user mode, clear trans
		uint8 tempVar = useNextTemp();
		emit1(irMOVC, tempVar, USR_MODE, fNONE, fNONE);
		emit2(irCMP, tempVar, vMODE, fNONE, fNONE);
		emitV(irGOTOEQ, armletCounter + 3, fNONE, fNONE); // skip cleartrans if in user mode
		emit(irSETTRANS, fNONE, fNONE);
	}

	// backpatch goto for unsuccessful memory access
	if( getBit(instruction, 20) || !getBit(instruction, 21) )
	{
		// if load or if T bit not set
		// T handling only done if load was successful so set goto to after T handling
		unsuccessfulGoto->value = armletCounter + 1;
	}
	else
	{
		// if store and T bit set
		// T handling done whether store was successful or not so set goto to before T handling
		unsuccessfulGoto->value = armletCounter - 3;
	}

	// if access was unsuccessful continues here (for load)...

	// check for unsuccessful access
	// if mem access was unsuccessful then C function will have triggered data
	// abort or address exception, so now we have to leave, returning the success variable
	uint8 testValue = useNextTemp();
	emit1(irMOVC, testValue, 0, fNONE, fNONE);
	emit2(irCMP, successFlag, testValue, fNONE, fNONE);
	emitV(irGOTONE, armletCounter + 9, fNONE, fNONE);	// if successful then skip this leave bit
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

	// if rd was PC and load instruction (and hence PC has changed)
	if(rd == 15 && getBit(instruction, 20) )
	{
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		return FALSE;
	}
	else
	{
		return TRUE;
	}
}

//
// Translate Single Data Transfer with Register-Specified Pre-Indexing
//

BOOL CProfiler::translateSingleDTRegOffsetPreIndex(uint32 address, uint32 instruction)
{
	// if bit 4 set then it's an undefined instruction not a single data transfer
	if( getBit(instruction, 4) )
	{
		// ??? throw undefined instruction exception
		TRACE("ERROR - UNDEF INSTRUCTION EXCEPTION TRANSLATED IN translateSingleDTRegOffsetPreIndex() \n");
		return FALSE;
	}

	// get base address reg
	uint8 rn = getField(instruction, 16, 19);
	// get register to load to/store from
	uint8 rd = getField(instruction, 12, 15);

	uint8 successFlag = useNextTemp();		// temp for returning RAM access success

	// get register holding offset
	uint8 offsetReg = getField(instruction, 0, 3);

	if(offsetReg == 15)
		emit1(irGETPC, vPC, address + 8, fALL, fNONE);
	// get immediate amount to shift by
	uint8 shiftAmount = getField(instruction, 7, 11);
	// var to hold final offset value
	uint8 offset = useNextTemp();

	// deal with special cases where shift amount is 0 e.g. LSR#32, RRX
	if(shiftAmount == 0)
	{
		switch( getField(instruction, 5, 6) )
		{
			case LSL : offset = offsetReg; break;
			// represents LSR #32
			case LSR : emit1(irMOVC, offset, 0, fNONE, fNONE); break;
			case ASR : emit3(irASR, offset, offsetReg, 31, fNONE, fNONE); break;
			// encoding for ROR #0 denotes RRX
			case ROR : emit2(irRRX, offset, offsetReg, fNONE, fNONE); break;
		}
	}
	else
	{
		// shiftAmount > 0
		uint8 tempVar = useNextTemp();
		emit1(irMOVC, tempVar, shiftAmount, fNONE, fNONE);
		switch( getField(instruction, 5, 6) )
		{
			case LSL : emit3(irLSL, offset, offsetReg, tempVar, fNONE, fNONE); break;
			case LSR : emit3(irLSR, offset, offsetReg, tempVar, fNONE, fNONE); break;
			case ASR : emit3(irASR, offset, offsetReg, tempVar, fNONE, fNONE); break;
			case ROR : emit3(irROR, offset, offsetReg, tempVar, fNONE, fNONE); break;
		}
	}

	// get variable to put address to access into
	uint8 addressVar = useNextTemp();
	// check add/sub offset
	if( getBit(instruction, 23) )
	{
		uint8 baseRegister;

		// add offset
		if(rn == 15)
		{
			// can't have addressVar==vPC since vPC could already contain
			// shift amount
			uint8 tempPC = useNextTemp();
			// then can statically determine new rn value completely
			emit1(irMOVC, tempPC, address + 8, fNONE, fNONE);
			baseRegister = tempPC;
		}
		else
		{
			baseRegister = rn;
		}

		emit3(irADD, addressVar, baseRegister, offset, fNONE, fNONE);
	}
	else
	{
		uint8 baseRegister;

		// subtract offset
		if(rn == 15)
		{
			// can't have addressVar==vPC since vPC could already contain
			// shift amount
			uint8 tempPC = useNextTemp();
			// then can statically determine new rn value completely
			emit1(irMOVC, tempPC, address + 8, fNONE, fNONE);
			baseRegister = tempPC;
		}
		else
		{
			baseRegister = rn;
		}

		emit3(irSUB, addressVar, baseRegister, offset, fNONE, fNONE);
	}

	// test addressVar for 26 bit address limit
	uint8 addressTest = useNextTemp();
	emit1(irMOVC, addressTest, 0xFC000000, fNONE, fNONE);
	uint8 addressTestResult = useNextTemp();
	emit3(irAND, addressTestResult, addressVar, addressTest, fNONE, fNONE);
	emit1(irMOVC, addressTest, 0, fNONE, fNONE);
	emit2(irCMP, addressTestResult, addressTest, fNONE, fNONE);
	// if address is ok then skip leave
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveAddressException, fALL, fNONE);

	// check load/store
	if( getBit(instruction, 20) )
	{
		// load

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irLDB, rd, addressVar, successFlag, fNONE, fNONE);
		else
			emit3(irLDW, rd, addressVar, successFlag, fNONE, fNONE);

		uint8 testValue = useNextTemp();
		// 0 = successful access
		// if successful then jump to later on
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);		// skip leave if successful
	}
	else
	{
		// store

		// if rd==PC
		if(rd == 15)
		{
			emit1(irGETPC, vPC, address + 12, fALL, fNONE);
			// if byte quantity
			if( getBit(instruction, 22) )
			{
				uint8 tempByte = useNextTemp();
				emit1(irMOVC, tempByte, 0xff, fNONE, fNONE);
				emit3(irAND, vPC, vPC, tempByte, fNONE, fNONE);
			}
		}

		// if byte else word
		if( getBit(instruction, 22) )
			emit3(irSTB, rd, addressVar, successFlag, fNONE, fNONE);
		else
			emit3(irSTW, rd, addressVar, successFlag, fNONE, fNONE);

		uint8 testValue = useNextTemp();
		// 0 = successful access
		// if successful then jump to later on
		emit1(irMOVC, testValue, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testValue, fNONE, fNONE);
		emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);		// skip leave if successful
	}

	// leaving code since access unsuccessful
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

	// successful, so...

	// check for writeback
	if( getBit(instruction, 21) )
	{
		// check they're not the same which could happen if imm offset was 0
		if(rn != addressVar)
		{
			// don't worry about rn==r15 since according to ARM610 datasheet
			// "write-back shall not be specified if r15 is specified as the base register"
			emit3(irMOV, rn, addressVar, vUNUSED, fNONE, fNONE);
		}
	}
	
	// if rd was PC and load instruction (and hence PC has changed)
	if(rd == 15 && getBit(instruction, 20) )
	{
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		return FALSE;
	}
	else
	{
		return TRUE;
	}

	return TRUE;
}

//
// Translate both pre and post indexed block data transfers
//

BOOL CProfiler::translateBlockDataTransfer(uint32 address, uint32 instruction)
{

	uint8 rn = getField(instruction, 16, 19);				// get base address
	// note, according to ARM610 datasheet, rn cannot be r15
	uint32 registerList = getField(instruction, 0, 15);		// get list of regs to store

	// decode settings from instruction
	BOOL loadWord = getBit(instruction, 20);
	BOOL writeBack = getBit(instruction, 21);
	BOOL sFlag = getBit(instruction, 22);
	BOOL increment = getBit(instruction, 23);
	BOOL postIndexed = !getBit(instruction, 24);

	Armlet* patchGoto;	// ptr for gotos that need backpatching

	// test initial address in rn is valid (26 bit limit)
	uint8 validTest = useNextTemp();
	emit1(irMOVC, validTest, 0xFC000000, fNONE, fNONE);
	uint8 validTestResult = useNextTemp();
	emit3(irAND, validTestResult, rn, validTest, fNONE, fNONE);
	emit1(irMOVC, validTest, 0, fNONE, fNONE);
	emit2(irCMP, validTestResult, validTest, fNONE, fNONE);

	// if address is ok then skip leave
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveAddressException, fALL, fNONE);

	// note, lowest reg always gets put in the lowest address (whether inc/dec)
	
	// if we're NOT in user mode and S flag is set and r15 is not to be stored
	// then need to use user mode registers (not the current mode)
	// we do not want to do mode swapping in recompiled code as it is too complex
	// so drop back to dispatcher
	if(sFlag && !getBit(instruction, 15))
	{
		uint8 modeVar = useNextTemp();
		emit1(irMOVC, modeVar, USR_MODE, fNONE, fNONE);
		emit2(irCMP, vMODE, modeVar, fNONE, fNONE);
		emitV(irGOTONE, armletCounter + 9, fNONE, fNONE); // skip leave if not in user mode
		emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveBlockDTUser, fALL, fNONE);
	}

	// if it's LDM then set rn to final address here at the start
	if(loadWord)
	{
		if(writeBack)
		{
			// this is ok for both post and pre indexed
			if(increment)
			{
				// increment
				uint8 tempVar = useNextTemp();
				emit1(irMOVC, tempVar, (countSetBits(registerList) << 2), fNONE, fNONE );
				emit3(irADD, rn, rn, tempVar, fNONE, fNONE);
			}
			else
			{
				// decrement
				uint8 tempVar = useNextTemp();
				emit1(irMOVC, tempVar, (countSetBits(registerList) << 2), fNONE, fNONE );
				emit3(irSUB, rn, rn, tempVar, fNONE, fNONE);
			}
		}
		else
		{
			// if it's not writeback then rn is left as rn so do nothing
		}		
	}

	// set up address to start access
	// increment
	uint8 currentAddress = useNextTemp();
	if(postIndexed)
	{
		if(increment)
		{
			emit3(irMOV, currentAddress, rn, vUNUSED, fNONE, fNONE);
		}
		else
		{
			// decrement
			// -4 is to take account of the +4 that is done in interpreter
			emit1(irMOVC, currentAddress, (countSetBits(registerList) << 2) - 4, fNONE, fNONE );
			emit3(irSUB, currentAddress, rn, currentAddress, fNONE, fNONE);
		}
	}
	else
	{
		if(increment)
		{
			// add 4 to the start address
			emit1(irMOVC, currentAddress, 4, fNONE, fNONE);
			emit3(irADD, currentAddress, currentAddress, rn, fNONE, fNONE);
		}
		else
		{
			// decrement
			// no -4 needed here
			emit1(irMOVC, currentAddress, (countSetBits(registerList) << 2), fNONE, fNONE );
			emit3(irSUB, currentAddress, rn, currentAddress, fNONE, fNONE);
		}
	}
	// note, the above decision could have been spilt in two and everything
	// else duplicated, since pre/post indexing is already decoded, however
	// this would make the code more susceptible to bugs (since any fixes would
	// likely need to be duplicated also)

	// find first reg to be done
	int index;
	for(index=0; index<15; index++)
	{
		if( getBit(instruction, index) )
		{
			// generate code to do this reg
			uint8 successFlag = useNextTemp();
			if(loadWord)
				emit3(irLDW, index, currentAddress, successFlag, fNONE, fNONE);
			else
				emit3(irSTW, index, currentAddress, successFlag, fNONE, fNONE);
			uint8 testVar = useNextTemp();
			emit1(irMOVC, testVar, 0, fNONE, fNONE);
			emit2(irCMP, successFlag, testVar, fNONE, fNONE);
			emitV(irGOTONE, 0xffffffff, fNONE, fNONE);			// to be backpatched
			patchGoto = lastArmlet;				// preserve this for backpatching
			// if successful jump this bit

			// condition for S flag instructions that writeback must be
			// set for this to happen
			if(!sFlag || writeBack)
			{
				// set rn to finalAddress
				
				// calculate final address
				if(writeBack)
				{
					// again, ok for pre and post indexed
					if(increment)
					{
						uint8 bytesUsed = useNextTemp();
						emit1(irMOVC, bytesUsed, countSetBits(registerList) << 2, fNONE, fNONE);
						emit3(irADD, rn, rn, bytesUsed, fNONE, fNONE);
					}
					else
					{
						// at this stage currentAddress is what rn should be set to (+4) so
						uint8 tempVar = useNextTemp();
						emit1(irMOVC, tempVar, 4, fNONE, fNONE);
						emit3(irSUB, rn, currentAddress, tempVar, fNONE, fNONE);
					}
				}
				else
				{
					// for non-writeback instructions final address is the same as the old rn
					// (for both increment and decrement) so do nothing
				}
			}

			// data abort
			emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
			emitInterruptCheck(address, FALSE); // represents 6 armlets
			emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

			// backpatch goto since jumping to the instruction after last one emitted
			patchGoto->value = armletCounter + 1;

			// if successful adjust increment address (whether inc or dec data transfer)
			uint8 incValue = useNextTemp();
			emit1(irMOVC, incValue, 4, fNONE, fNONE);
			emit3(irADD, currentAddress, currentAddress, incValue, fNONE, fNONE);

			break;	// we've found 1st reg so leave loop
		}
	}
			
	// for all remaining regs but NOT r15
	index++;
	for( ; index<15; index++)
	{
		if( getBit(instruction, index) )
		{
			// generate code to do this reg
			uint8 successFlag = useNextTemp();
			if(loadWord)
				emit3(irLDW, index, currentAddress, successFlag, fNONE, fNONE);
			else
				emit3(irSTW, index, currentAddress, successFlag, fNONE, fNONE);
			uint8 testVar = useNextTemp();
			emit1(irMOVC, testVar, 0, fNONE, fNONE);
			emit2(irCMP, successFlag, testVar, fNONE, fNONE);
			emitV(irGOTONE, 0xffffffff, fNONE, fNONE);		// leave for backpatching
			patchGoto = lastArmlet;			// preserve this for backpatching
		
			// if successful then skip this bit
		
			// if LDM then update rn with final address
			// (STMs have already done the writeback so only do this for LDM)
			if(loadWord)
			{
				if(sFlag && writeBack)
				{
					// LDM with S flag, only updates rn if writeback set

					uint8 bytesUsed = useNextTemp();
					emit1(irMOVC, bytesUsed, countSetBits(registerList) << 2, fNONE, fNONE);
					if(increment)
					{
						emit3(irADD, rn, rn, bytesUsed, fNONE, fNONE);
					}
					else
					{
						// decrement
						emit3(irSUB, rn, rn, bytesUsed, fNONE, fNONE);
					}
				}
				
				// note, LDM without S flag would update rn with it's own value
				// for both inc and dec cases, so don't bother
			}

			// data abort
			emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
			emitInterruptCheck(address, FALSE); // represents 5 armlets
			emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

			// backpatch goto since jumping to the instruction after this one
			patchGoto->value = armletCounter + 1;

			// if successful increment current address (for both inc and dec data transfer)
			uint8 incValue = useNextTemp();
			emit1(irMOVC, incValue, 4, fNONE, fNONE);
			emit3(irADD, currentAddress, currentAddress, incValue, fNONE, fNONE);
			
			// don't leave loop with break since we want to do this for
			// all regs in the list

		}
	}

	// if r15 is in the list
	if( getBit(instruction, 15) )
	{

		if(!loadWord)
		{
			// if store then get PSR and pipelining
			emit1(irGETPC, vPC, address + 12, fALL, fNONE);
		}

		// generate code to do r15
		uint8 successFlag = useNextTemp();
		if(loadWord)
			emit3(irLDW, index, currentAddress, successFlag, fNONE, fNONE);
		else
			emit3(irSTW, index, currentAddress, successFlag, fNONE, fNONE);
		uint8 testVar = useNextTemp();
		emit1(irMOVC, testVar, 0, fNONE, fNONE);
		emit2(irCMP, successFlag, testVar, fNONE, fNONE);
		emitV(irGOTONE, 0xffffffff, fNONE, fNONE);			// to be backpatched
		patchGoto = lastArmlet;					// preserve this for backpatching

		// data abort
		emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);

		// backpatch goto since jumping to the instruction after this one
		patchGoto->value = armletCounter + 1;

		if(loadWord)
		{
			// if load and S then update PSR with data (irSETPC)
			if(sFlag)
				emit(irSETPC, fALL, fNONE);
			
			emitInterruptCheck(address, FALSE); // represents 5 armlets

			// end chunk on dynamic PC
			emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);

			return FALSE;
		}
	}

	return TRUE;
}

//
// Translate Branch
//

BOOL CProfiler::translateBranch(uint32 address, uint32 instruction)
{
	// get branch offset
	uint32 offset = getField(instruction, 0, 23);
	// determine new PC value
	uint32 destination = (address + 8) + (offset << 2);
	// mask to 26 bit addressable space
	destination &= 0x3FFFFFF;

	// if branch forward
	if(destination > address)
	{
		// update PC
		emit1(irMOVC, vPC, destination, fNONE, fNONE);
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		// leave
		emitV(irLEAVE, leaveBranchForward, fALL, fNONE);
	}
	else
	{
		// if branch to within chunk already recompiled
		if(destination >= startAddress)
		{
			// no need to update PC as we're not leaving chunk
			
			// generate an internal-chunk goto
			translateBranchInternal(destination, irGOTO);
			
			// still stop recompilation, though don't issue leave armlet
		}
		else
		{
			// brand to before start of chunk

			// update PC
			emit1(irMOVC, vPC, destination, fNONE, fNONE);
			emitInterruptCheck(address, FALSE); // represents 5 armlets
			// leave
			emitV(irLEAVE, leaveBranchBackward, fALL, fNONE);
		}
	}

	// branch will leave chunk
	return FALSE;
}

//
// Translate a branch that's entirely internal to the chunk
// destination is the ARM address of the branch destination
//

void CProfiler::translateBranchInternal(uint32 destination, uint8 gotoOpcode)
{
	emitInterruptCheck(destination, TRUE);

	// get number of destination armlet
	uint32 armletDestination = addressToArmletNumber[destination];
	emitV(gotoOpcode, armletDestination, getCondFlags(gotoOpcode - irGOTOEQ), fNONE);

	// check that the GOTO destination isn't in the middle of a conditional
	// execution block and insert another goto at the destination of this
	// goto to recheck the condition if it is
	LinkedListElement* conditionalBlockEntry = conditionalBlockList->getPointerToHead();
	// check boundaries of conditional blocks
	while(conditionalBlockEntry != NULL)
	{
		// access to data in linked list element
		ConditionalBlockInfo* conditionalBlock = (ConditionalBlockInfo*)conditionalBlockEntry->payload;
		
		// ??? debugging
		//TRACE("conditionalBlock->startArmlet=0x%x conditionalBlock->endArmlet=0x%x armletDestination=0x%x \n", conditionalBlock->startArmlet, conditionalBlock->endArmlet, armletDestination);
		
		// if this GOTO points to after conditional block's GOTOcc
		if(conditionalBlock->startArmlet < armletDestination)
		{
			// and if this GOTO points to the last armlet in the conditional block (or before)
			if(conditionalBlock->endArmlet >= armletDestination)
			{
				// then this branch goes to middle of conditional block
				
				// so insert goto at armletDestination to endArmlet+1 of conditional block

				// create goto armlet
				uint8 opcode = irGOTOEQ + (invertConditionCode(conditionalBlock->conditionCode) - ccEQ);
				Armlet* gotoArmlet = getEmit(opcode, (conditionalBlock->endArmlet)+1, vUNUSED, getCondFlags(opcode - irGOTOEQ), fNONE);

				// insert goto armlet into LL at armletDestination
				armletList->insert(armletDestination, gotoArmlet);

				// update all gotos before AND after the inserted one
				LinkedListElement* ptr = armletList->getPointerToHead();
				uint8 counter = 0;
				while(ptr != NULL)
				{
					// get armlet
					Armlet* ptrArmlet = (Armlet*)ptr->payload;
					
					// ??? debugging
					//TRACE("ptrArmlet->opcode=0x%x irGOTOEQ=0x%x irGOTONV=0x%x ptrArmlet->value=0x%x armletDestination=0x%x \n", ptrArmlet->opcode, irGOTOEQ, irGOTONV, ptrArmlet->value, armletDestination);
					
					if(ptrArmlet->opcode >= irGOTOEQ)
					{
						if(ptrArmlet->opcode <= irGOTONV)
						{
							// if this goto jumps to after the inserted one then increment it
							if(ptrArmlet->value > armletDestination)
								ptrArmlet->value++;
						}
					}
					ptr = ptr->nextElement;
					counter++;
				}

				// update armlet counter
				armletCounter++;

				// update addressToArmletNumber entries for after the goto
				// get iterator to start
				std::map<uint32, uint32>::iterator addrIterator = addressToArmletNumber.begin();
				do
				{
					// if the armlet number is >= inserted goto then increment it
					if( (*addrIterator).second > armletDestination )
						((*addrIterator).second)++;
					addrIterator++; // next entry
				}
				while( addrIterator != addressToArmletNumber.end() );

				// update ConditionalBlockInfo list						
				// create new entry (for second half old old block)
				ConditionalBlockInfo* newBlock = new ConditionalBlockInfo;
				conditionalBlockList->addToTail(newBlock);
				newBlock->conditionCode = conditionalBlock->conditionCode;
				newBlock->startArmlet = armletDestination;
				newBlock->endArmlet = conditionalBlock->endArmlet + 1;
				// update old entry with new end (first half of old block)
				conditionalBlock->endArmlet = armletDestination - 1;

				// leave loop, since an armlet cannot be in two conditional blocks
				break;
			}
		}
		// check next conditional block
		conditionalBlockEntry = conditionalBlockEntry->nextElement;
	} // end while
}

//
// Translate Branch with Link
//

BOOL CProfiler::translateBranchWithLink(uint32 address, uint32 instruction)
{

	// store current PC (only address+4 not address+8) in r14
	emit1(irGETPC, vR14, address + 4, fALL, fNONE);

	// BL is identical to Branch from here on so...
	return translateBranch(address, instruction);
}

//
//
//

BOOL CProfiler::translateCoProDTPreIndex(uint32 address, uint32 instruction)
{
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 6 armlets
	emitV(irLEAVE, leaveCoProDTPreIndex, fALL, fNONE);

	return TRUE;
}

//
//
//

BOOL CProfiler::translateCoProDTPostIndex(uint32 address, uint32 instruction)
{
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 6 armlets
	emitV(irLEAVE, leaveCoProDTPostIndex, fALL, fNONE);

	return TRUE;
}

//
//
//

BOOL CProfiler::translateCoProRegTransferOrDataOperation(uint32 address, uint32 instruction)
{
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 6 armlets
	emitV(irLEAVE, leaveCoProRTorDO, fALL, fNONE);

	return TRUE;
}

//
//
//

BOOL CProfiler::translateSoftwareInterrupt(uint32 address, uint32 instruction)
{
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 6 armlets
	// leave, signally that a Software Exception has been thrown
	emitV(irLEAVE, leaveSwi, fALL, fNONE);

	return FALSE;
}

//
//
//

BOOL CProfiler::translateMultiply(uint32 address, uint32 instruction)
{
	// rd = (rm * rs) + rn  (note rn is optional on aFlag)
	uint8 rd = getField(instruction, 16, 19);
	uint8 rs = getField(instruction, 8, 11);
	uint8 rm = getField(instruction, 0, 3);
	
	BOOL sFlag = getBit(instruction, 20);
	BOOL aFlag = getBit(instruction, 21);

	// according to ARM ARM, specifying any of the regs as r15 has
	// unpredictable results so assume it's never used note, this is
	// NOT what the ARM3 datasheet says
	
	// if rs==r15 PC address + 8
	if(rs == 15)
		emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	
	// if rm==r15 PC|PSR address + 12
	if(rm == 15)
		emit1(irGETPC, vPC, address + 12, fALL, fNONE);
	
	// if MLA
	if(aFlag)
	{
		uint8 rn = getField(instruction, 12, 15);

		// if rn==r15 PC|PSR address + 8
 		if(rn == 15)
			emit1(irGETPC, vPC, address + 8, fALL, fNONE);

		// if rd == rn then need to store result of multiplication
		// in a different reg so doesn't affect the accumulate
		uint8 result = rd;
		if(rd == rn)
			result = useNextTemp();
		
		emit3(irMUL, result, rm, rs, fNONE, fNONE);   // don't set flags since A
		
		if(sFlag)
			emit3(irADD, rd, result, rn, fNONE, fN | fZ); // do set flags
		else
			emit3(irADD, rd, result, rn, fNONE, fNONE);
	}
	else
	{
		// just plain old MUL

		if(sFlag)
			emit3(irMUL, rd, rm, rs, fNONE, fN | fZ);
		else
			emit3(irMUL, rd, rm, rs, fNONE, fNONE);
	}

	// rd should never be 15 anyway, but just to be on the safe side
	return checkRdForPC(rd, sFlag, address);
}

//
// Translates Single Data Swap
//

BOOL CProfiler::translateSingleDataSwap(uint32 address, uint32 instruction)
{
	// swap performs:
	// temp = [rn]
	// [rn] = rm
	// rd = temp

	BOOL bytes = getBit(instruction, 22);
	uint8 rn = getField(instruction, 16, 19);
	uint8 rd = getField(instruction, 12, 15);
	uint8 rm = getField(instruction, 0, 3);

	// allocate temp var to hold value during swapping
	uint8 tempVar = useNextTemp();
	// allocate success var to denote whether access successful
	uint8 successFlag = useNextTemp();

	// ??? note, this is not as accurate as the ARM3 datasheet suggests
	// because the code needed would be ridiculous, but not as
	// ridiculous as using r15 for rn in the first place!
	if(rn==15)
		emit1(irGETPC, vPC, address + 8, fALL, fNONE);

	// test rn for 26 bit address limit
	uint8 addressTest = useNextTemp();
	emit1(irMOVC, addressTest, 0xFC000000, fNONE, fNONE);
	uint8 addressTestResult = useNextTemp();
	emit3(irAND, addressTestResult, rn, addressTest, fNONE, fNONE);
	emit1(irMOVC, addressTest, 0, fNONE, fNONE);
	emit2(irCMP, addressTestResult, addressTest, fNONE, fNONE);
	// if address is ok then skip leave
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveAddressException, fALL, fNONE);

	// do temp = [rn]

	// if byte/word swap
	if(bytes)
	{
		// byte
		emit3(irLDB, tempVar, rn, successFlag, fNONE, fNONE);
	}
	else
	{
		// word
		emit3(irLDW, tempVar, rn, successFlag, fNONE, fNONE);
	}
	// setup test value, 0 is successful access
	uint8 testVar = useNextTemp();
	emit1(irMOVC, testVar, 0, fNONE, fNONE);
	emit2(irCMP, successFlag, testVar, fNONE, fNONE);
	// if successful then skip next armlet (skip leave)
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);


	// do [rn] = rm

	uint8 sourceRegister = rm;
	uint8 tempPC = useNextTemp();
	if(rm == 15)
	{
		emit1(irGETPC, tempPC, address + 12, fALL, fNONE);
		sourceRegister = tempPC;
	}
	// if byte/word swap
	if(bytes)
	{
		// byte
		// AND sourceRegister to just a byte
		uint8 andValue = useNextTemp();
		emit1(irMOVC, andValue, 0xff, fNONE, fNONE);
		emit3(irAND, sourceRegister, sourceRegister, andValue, fNONE, fNONE);
		emit3(irSTB, sourceRegister, rn, successFlag, fNONE, fNONE);
	}
	else
	{
		// word
		emit3(irSTW, sourceRegister, rn, successFlag, fNONE, fNONE);
	}
	// setup test value, 0 is successful access
	emit1(irMOVC, testVar, 0, fNONE, fNONE);
	emit2(irCMP, successFlag, testVar, fNONE, fNONE);
	// if successful then skip next armlet (skip leave)
	emitV(irGOTOEQ, armletCounter + 9, fNONE, fNONE);
	emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	emitInterruptCheck(address, FALSE); // represents 5 armlets
	emitV(irLEAVE, leaveDataAbortException, fALL, fNONE);


	// rd = temp
	
	emit3(irMOV, rd, tempVar, vUNUSED, fNONE, fNONE);
	if(rd == 15)
	{
		emitInterruptCheck(address, FALSE); // represents 5 armlets
		emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		return FALSE;
	}
	else
	{
		return TRUE;
	}
}

//
// return word from memory at specified address 
//

uint32 CProfiler::readWord(uint32 address)
{
	uint32 temp;

	if(useTestMemory)
		testMemory->readWord(address, temp);
	else
		MEMC.readw(address, temp);

	return temp;
}

//
// emit an armlet with no operands e.g. ioclock
//

void CProfiler::emit(uint8 opcode, uint8 inflags, uint8 outflags)
{
	// ??? debugging
	if(opcode < irFirstImplied || opcode > irLastImplied)
		TRACE("ERROR Incorrect opcode for emitV: opcode = 0x%x \n", opcode);

	// create new armlet
	Armlet* newArmlet = new Armlet;
	newArmlet->opcode = opcode;
	newArmlet->inflags = inflags;
	newArmlet->outflags = outflags;
	newArmlet->leader = FALSE;

	// set details for recompiler
	if(outflags != fNONE)
		flagsAdjusted = TRUE;

	// add new armlet to armlet list
	armletList->addToTail(newArmlet);

	// update lastArmlet to point to this newly created armlet
	lastArmlet = newArmlet;
	// inc armlet counter
	armletCounter++;
}

//
// emit an armlet with one 32bit operand e.g. goto
//

void CProfiler::emitV(uint8 opcode, uint32 value, uint8 inflags, uint8 outflags)
{

	// ??? debugging
	if(opcode < irFirstValue || opcode > irLastValue)
		TRACE("ERROR Incorrect opcode for emitV: opcode = 0x%x \n", opcode);

	// create new armlet
	Armlet* newArmlet = new Armlet;
	newArmlet->opcode = opcode;
	newArmlet->value = value;
	newArmlet->inflags = inflags;
	newArmlet->outflags = outflags;
	newArmlet->leader = FALSE;

	// set details for recompiler
	if(outflags != fNONE)
		flagsAdjusted = TRUE;

	// add new armlet to armlet list
	armletList->addToTail(newArmlet);

	// update lastArmlet to point to this newly created armlet
	lastArmlet = newArmlet;
	// inc armlet counter
	armletCounter++;
}

//
// emit an armlet with rd and one 32 bit operand (rx and ry) e.g. movc rx,0xFFFFFFFF
//

void CProfiler::emit1(uint8 opcode, uint8 rd, uint32 value, uint8 inflags, uint8 outflags)
{
	// ??? debugging
	if(opcode < irFirstOneVar || opcode > irLastOneVar)
		TRACE("ERROR Incorrect opcode for emit1: opcode = 0x%x \n", opcode);

	// create new armlet
	Armlet* newArmlet = new Armlet;
	newArmlet->opcode = opcode;
	newArmlet->rd = rd;
	newArmlet->value = value;
	newArmlet->inflags = inflags;
	newArmlet->outflags = outflags;
	newArmlet->leader = FALSE;

	// set details for recompiler
	if(outflags != fNONE)
		flagsAdjusted = TRUE;

	// add new armlet to armlet list
	armletList->addToTail(newArmlet);

	// update lastArmlet to point to this newly created armlet
	lastArmlet = newArmlet;
	// inc armlet counter
	armletCounter++;
}

//
// emit an armlet with 2 operands (rx and ry) e.g. cmp rx,ry
// note, also used for MOV etc. take care!
//

void CProfiler::emit2(uint8 opcode, uint8 rx, uint8 ry, uint8 inflags, uint8 outflags)
{
	// ??? debugging
	if(opcode < irFirstTwoVar || opcode > irLastTwoVar)
		TRACE("ERROR Incorrect opcode for emit2: opcode = 0x%x \n", opcode);

	// create new armlet
	Armlet* newArmlet = new Armlet;
	newArmlet->opcode = opcode;
	newArmlet->rx = rx;
	newArmlet->ry = ry;
	newArmlet->inflags = inflags;
	newArmlet->outflags = outflags;
	newArmlet->leader = FALSE;

	// set details for recompiler
	if(outflags != fNONE)
		flagsAdjusted = TRUE;

	// add new armlet to armlet list
	armletList->addToTail(newArmlet);

	// update lastArmlet to point to this newly created armlet
	lastArmlet = newArmlet;
	// inc armlet counter
	armletCounter++;
}

//
// emit an armlet with 3 operands (rd, rx and ry) e.g. add rd,rx,ry
//

void CProfiler::emit3(uint8 opcode, uint8 rd, uint8 rx, uint8 ry, uint8 inflags, uint8 outflags)
{
	// ??? debugging
	if(opcode < irFirstThreeVar || opcode > irLastThreeVar)
	{
		// two exceptions MOV and MVN
		if(opcode != irMOV && opcode != irMVN)
			TRACE("ERROR Incorrect opcode for emit3: opcode = 0x%x \n", opcode);
	}

	// create new armlet
	Armlet* newArmlet = new Armlet;
	newArmlet->opcode = opcode;
	newArmlet->rd = rd;
	newArmlet->rx = rx;
	newArmlet->ry = ry;
	newArmlet->inflags = inflags;
	newArmlet->outflags = outflags;
	newArmlet->leader = FALSE;

	// set details for recompiler
	if(outflags != fNONE)
		flagsAdjusted = TRUE;

	// add new armlet to armlet list
	armletList->addToTail(newArmlet);

	// update lastArmlet to point to this newly created armlet
	lastArmlet = newArmlet;
	// inc armlet counter
	armletCounter++;
}

//
// tests for bit pattern 1001 in bits 4-7 to determine the difference
// between a data processing instruction and a MUL or SWP instruction
//

BOOL CProfiler::isExtendedInstruction(uint32 instruction)
{
	return ((instruction & 0x90) == 0x90);
}


//
// returns flags required by this condition code's checking
//

uint8 CProfiler::getCondFlags(uint8 conditionCode)
{
	switch(conditionCode)
	{
		case ccEQ	: return (fZ); break;
		case ccNE	: return (fZ); break;
		case ccCS	: return (fC); break;
		case ccCC	: return (fC); break;
		case ccMI	: return (fN); break;
		case ccPL	: return (fN); break;
		case ccVS	: return (fV); break;
		case ccVC	: return (fV); break;
		case ccHI	: return (fC | fZ); break;
		case ccLS	: return (fC | fZ); break;
		case ccGE	: return (fN | fV); break;
		case ccLT	: return (fN | fV); break;
		case ccGT	: return (fZ | fN  | fV); break;
		case ccLE	: return (fZ | fN  | fV); break;
		case ccAL	: return (fNONE); break;
		case ccNV	: return (fNONE); break;
	}

	TRACE("ERROR, getCondFlags() switch has fallen through, conditioncode=0x%x \n", conditionCode);
	return fNONE;
}


//
// returns the value of the next temp variable to be used
//

uint8 CProfiler::useNextTemp()
{
	uint8 toBeUsed = nextFreeTemp;
	nextFreeTemp++;

	// check still in bounds of available temps
	if(toBeUsed > maxTemp)
	{
		TRACE("ERROR attempting to use a temp variable out of available bounds toBeUsed=%d \n", toBeUsed - vT0 );
	}

	return toBeUsed;
}

//
// reset available temps so that all are available for use
//

void CProfiler::resetAvailableTemps()
{
	// set next free temp variable back to T0 for new instruction
	nextFreeTemp = vT0;
}

//
// takes rd as an argument, if rd is PC then emit code to deal with
// leaving chunk and return FALSE, otherwise return TRUE
//

BOOL CProfiler::checkRdForPC(uint8 rd, BOOL sFlag, uint32 address)
{
	// deal with the case where rd is PC
	if(rd == 15)
	{
		if( sFlag )
		{
			// signals that PC and PSR have been adjusted
			// if in priviledge modes then mode and interrupt flags
			// may have changed too so spill everything and leave
			emit(irSETPC, fALL, fNONE);
			emitInterruptCheck(address, FALSE); // represents 5 armlets
			emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		}
		else
		{
			// PC has been changed, 99.999% of times this is done from
			// a data processing instruction the new PC will have beeen
			// dynamically determined (it makes no sense to do MOV PC,#1234...)
			// so we must leave the chunk
			emitInterruptCheck(address, FALSE); // represents 5 armlets
			emitV(irLEAVE, leaveDynamicPC, fALL, fNONE);
		}
		return FALSE;
	}
	
	return TRUE;
}

Armlet* CProfiler::getEmit(uint8 opcode, uint32 value, uint8 rd, uint8 inflags, uint8 outflags)
{
	Armlet* newArmlet = new Armlet;

	newArmlet->opcode = opcode;
	newArmlet->value = value;
	newArmlet->rd = rd;
	newArmlet->inflags = inflags;
	newArmlet->outflags = outflags;
	newArmlet->leader = FALSE;

	return newArmlet;
}

uint8 CProfiler::invertConditionCode(uint8 conditionCode)
{
	switch(conditionCode)
	{
		case ccEQ	: return (ccNE); break;
		case ccNE	: return (ccEQ); break;
		case ccCS	: return (ccCC); break;
		case ccCC	: return (ccCS); break;
		case ccMI	: return (ccPL); break;
		case ccPL	: return (ccMI); break;
		case ccVS	: return (ccVC); break;
		case ccVC	: return (ccVS); break;
		case ccHI	: return (ccLS); break;
		case ccLS	: return (ccHI); break;
		case ccGE	: return (ccLT); break;
		case ccLT	: return (ccGE); break;
		case ccGT	: return (ccLE); break;
		case ccLE	: return (ccGT); break;
		case ccAL	: return (ccNV); break;
		case ccNV	: return (ccAL); break;
	}

	TRACE("ERROR, invertConditionCode() switch has fallen through, conditioncode=0x%x \n", conditionCode);
	return ccNV;
}

//
// emit code to be done before the end of every basic block to check for interrupts
//

void CProfiler::emitInterruptCheck(uint32 address, BOOL updatePC)
{
	uint8 intCheckResult = useNextTemp();
	emit1(irINTCHECK, intCheckResult, basicBlockCount, fNONE, fNONE);
	uint8 compareVar = useNextTemp();
	emit1(irMOVC, compareVar, 0, fNONE, fNONE);
	emit2(irCMP, intCheckResult, compareVar, fNONE, fNONE);
	
	// instruction code may have updated PC in which case we shouldn't
	if(updatePC)
	{
		// if no interrupt skip leave
		emitV(irGOTOEQ, armletCounter + 4, fNONE, fNONE);
		emit1(irMOVC, vPC, address + 8, fNONE, fNONE);
	}
	else
	{
		// if no interrupt skip leave
		emitV(irGOTOEQ, armletCounter + 3, fNONE, fNONE);
	}

	emitV(irLEAVE, leaveIntCheck, fNONE, fNONE);

	basicBlockCount = 0;
}

void CProfiler::setCodeCache(CCodeCache *aCodeCache)
{
	optimiser->setCodeCache(aCodeCache);
}
