assembler: Fix debug output
[b43-tools.git] / assembler / main.c
index bda23d078030810add7ad8161bdbd10c3eb69c75..8abe03c8fbbe51ac3371b0c9990ce313f6aa6dd3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *   Copyright (C) 2006-2007  Michael Buesch <mb@bu3sch.de>
+ *   Copyright (C) 2006-2010  Michael Buesch <mb@bu3sch.de>
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License version 2
@@ -49,6 +49,11 @@ struct code_output {
                OUT_LABEL,
        } type;
 
+       /* Set to true, if this is a jump instruction.
+        * This is only used when assembling RET to check
+        * whether the previous instruction was a jump or not. */
+       bool is_jump_insn;
+
        unsigned int opcode;
        struct out_operand operands[3];
 
@@ -64,7 +69,9 @@ struct code_output {
 };
 
 struct assembler_context {
-       int arch;
+       /* The architecture version (802.11 core revision) */
+       unsigned int arch;
+
        struct label *start_label;
 
        /* Tracking stuff */
@@ -110,6 +117,7 @@ static void eval_directives(struct assembler_context *ctx)
        struct label *l;
        int have_start_label = 0;
        int have_arch = 0;
+       unsigned int arch_fallback = 0;
 
        for_each_statement(ctx, s) {
                if (s->type == STMT_ASMDIR) {
@@ -119,6 +127,21 @@ static void eval_directives(struct assembler_context *ctx)
                                if (have_arch)
                                        asm_error(ctx, "Multiple %%arch definitions");
                                ctx->arch = ad->u.arch;
+                               if (ctx->arch > 5 && ctx->arch < 15)
+                                       arch_fallback = 5;
+                               if (ctx->arch > 15)
+                                       arch_fallback = 15;
+                               if (arch_fallback) {
+                                       asm_warn(ctx, "Using %%arch %d is incorrect. "
+                                                "The wireless core revision %d uses the "
+                                                "firmware architecture %d. So use %%arch %d",
+                                                ctx->arch, ctx->arch, arch_fallback, arch_fallback);
+                                       ctx->arch = arch_fallback;
+                               }
+                               if (ctx->arch != 5 && ctx->arch != 15) {
+                                       asm_error(ctx, "Architecture version %u unsupported",
+                                                 ctx->arch);
+                               }
                                have_arch = 1;
                                break;
                        case ADIR_START:
@@ -135,13 +158,11 @@ static void eval_directives(struct assembler_context *ctx)
 
        if (!have_arch)
                asm_error(ctx, "No %%arch defined");
-       if (ctx->arch != NEWWORLD)
-               asm_error(ctx, "TODO: Only NEWWORLD arch supported, yet");
        if (!have_start_label)
                asm_info(ctx, "Using start address 0");
 }
 
-static int is_possible_imm(unsigned int imm)
+static bool is_possible_imm(unsigned int imm)
 {
        unsigned int mask;
 
@@ -160,29 +181,52 @@ static int is_possible_imm(unsigned int imm)
        return 1;
 }
 
-static int is_valid_imm(unsigned int imm)
+static unsigned int immediate_nr_bits(struct assembler_context *ctx)
+{
+       switch (ctx->arch) {
+       case 5:
+               return 10; /* 10 bits */
+       case 15:
+               return 11; /* 11 bits */
+       }
+       asm_error(ctx, "Internal error: immediate_nr_bits unknown arch\n");
+}
+
+static bool is_valid_imm(struct assembler_context *ctx,
+                        unsigned int imm)
 {
        unsigned int mask;
+       unsigned int immediate_size;
 
        /* This function checks if the immediate value is representable
         * as a native immediate operand.
         *
-        * The value itself is 10bit long, signed.
-        * We also honor sign-extension, so we allow values
-        * of 0xFFFF, for example.
+        * For v5 architecture the immediate can be 10bit long.
+        * For v15 architecture the immediate can be 11bit long.
+        *
+        * The value is sign-extended, so we allow values
+        * of 0xFFFA, for example.
         */
 
        if (!is_possible_imm(imm))
                return 0;
        imm &= 0xFFFF;
 
-       /* assert sign extension */
-       mask = 0xFC00;
-       if (imm & (1 << 9)) {
-               /* sign-extended */
+       immediate_size = immediate_nr_bits(ctx);
+
+       /* First create a mask with all possible bits for
+        * an immediate value unset. */
+       mask = (~0 << immediate_size) & 0xFFFF;
+       /* Is the sign bit of the immediate set? */
+       if (imm & (1 << (immediate_size - 1))) {
+               /* Yes, so all bits above that must also
+                * be set, otherwise we can't represent this
+                * value in an operand. */
                if ((imm & mask) != mask)
                        return 0;
        } else {
+               /* All bits above the immediate's size must
+                * be unset. */
                if (imm & mask)
                        return 0;
        }
@@ -190,33 +234,33 @@ static int is_valid_imm(unsigned int imm)
        return 1;
 }
 
-static int is_contiguous_bitmask(unsigned int mask)
+/* This checks if the value is nonzero and a power of two. */
+static bool is_power_of_two(unsigned int value)
 {
-       int bit;
-       int only_zero_now = 0;
-
-       /* This checks if the mask is contiguous.
-        * A contiguous mask is:
-        *   0b0001111110000
-        * A non-contiguous mask is:
-        *   0b0001101110000
-        */
+       return (value && ((value & (value - 1)) == 0));
+}
 
-       bit = ffs(mask);
-       if (!bit)
-               return 1;
-       if (bit > 16)
-               return 1;
-       bit--;
-       for ( ; bit < 16; bit++) {
-               if (mask & (1 << bit)) {
-                       if (only_zero_now)
-                               return 0;
-               } else
-                       only_zero_now = 1;
-       }
+/* This checks if all bits set in the mask are contiguous.
+ * Zero is also considered a contiguous mask. */
+static bool is_contiguous_bitmask(unsigned int mask)
+{
+       unsigned int low_zeros_mask;
+       bool is_contiguous;
 
-       return 1;
+       if (mask == 0)
+               return 1;
+       /* Turn the lowest zeros of the mask into a bitmask.
+        * Example:  0b00011000 -> 0b00000111 */
+       low_zeros_mask = (mask - 1) & ~mask;
+       /* Adding the low_zeros_mask to the original mask
+        * basically is a bitwise OR operation.
+        * If the original mask was contiguous, we end up with a
+        * contiguous bitmask from bit 0 to the highest bit
+        * set in the original mask. Adding 1 will result in a single
+        * bit set, which is a power of two. */
+       is_contiguous = is_power_of_two(mask + low_zeros_mask + 1);
+
+       return is_contiguous;
 }
 
 static unsigned int generate_imm_operand(struct assembler_context *ctx,
@@ -224,21 +268,23 @@ static unsigned int generate_imm_operand(struct assembler_context *ctx,
 {
        unsigned int val, tmp;
        unsigned int mask;
-       int too_long = 0;
-
-       /* format: 0b11ii iiii iiii */
 
        val = 0xC00;
+       if (ctx->arch == 15)
+               val <<= 1;
        tmp = imm->imm;
 
-       if (!is_valid_imm(tmp)) {
+       if (!is_valid_imm(ctx, tmp)) {
                asm_warn(ctx, "IMMEDIATE 0x%X (%d) too long "
-                             "(> 9 bits + sign). Did you intend to "
+                             "(> %u bits + sign). Did you intend to "
                              "use implicit sign extension?",
-                        tmp, (int)tmp);
+                        tmp, (int)tmp, immediate_nr_bits(ctx) - 1);
        }
 
-       tmp &= 0x3FF;
+       if (ctx->arch == 15)
+               tmp &= 0x7FF;
+       else
+               tmp &= 0x3FF;
        val |= tmp;
 
        return val;
@@ -251,22 +297,25 @@ static unsigned int generate_reg_operand(struct assembler_context *ctx,
 
        switch (reg->type) {
        case GPR:
-               /* format: 0b1011 11rr rrrr */
                val |= 0xBC0;
-               if (reg->nr & ~0x3F)
+               if (ctx->arch == 15)
+                       val <<= 1;
+               if (reg->nr & ~0x3F) /* REVISIT: 128 regs for v15 arch possible? Probably not... */
                        asm_error(ctx, "GPR-nr too big");
                val |= reg->nr;
                break;
        case SPR:
-               /* format: 0b100. .... .... */
                val |= 0x800;
+               if (ctx->arch == 15)
+                       val <<= 1;
                if (reg->nr & ~0x1FF)
                        asm_error(ctx, "SPR-nr too big");
                val |= reg->nr;
                break;
        case OFFR:
-               /* format: 0b1000 0110 0rrr */
                val |= 0x860;
+               if (ctx->arch == 15)
+                       val <<= 1;
                if (reg->nr & ~0x7)
                        asm_error(ctx, "OFFR-nr too big");
                val |= reg->nr;
@@ -281,31 +330,62 @@ static unsigned int generate_reg_operand(struct assembler_context *ctx,
 static unsigned int generate_mem_operand(struct assembler_context *ctx,
                                         const struct memory *mem)
 {
-       unsigned int val = 0, off, reg;
+       unsigned int val = 0, off, reg, off_mask, reg_shift;
 
        switch (mem->type) {
        case MEM_DIRECT:
-               /* format: 0b0mmm mmmm mmmm */
                off = mem->offset;
-               if (off & ~0x7FF) {
-                       asm_warn(ctx, "DIRECT memoffset 0x%X too long (> 11 bits)", off);
-                       off &= 0x7FF;
+               switch (ctx->arch) {
+               case 5:
+                       if (off & ~0x7FF) {
+                               asm_warn(ctx, "DIRECT memoffset 0x%X too long (> 11 bits)", off);
+                               off &= 0x7FF;
+                       }
+                       break;
+               case 15:
+                       if (off & ~0xFFF) {
+                               asm_warn(ctx, "DIRECT memoffset 0x%X too long (> 12 bits)", off);
+                               off &= 0xFFF;
+                       }
+                       break;
+               default:
+                       asm_error(ctx, "Internal error: generate_mem_operand invalid arch");
                }
                val |= off;
                break;
        case MEM_INDIRECT:
-               /* format: 0b101r rroo oooo */
+               switch (ctx->arch) {
+               case 5:
+                       val = 0xA00;
+                       off_mask = 0x3F;
+                       reg_shift = 6;
+                       break;
+               case 15:
+                       val = 0x1400;
+                       off_mask = 0x7F;
+                       reg_shift = 7;
+                       break;
+               default:
+                       asm_error(ctx, "Internal error: MEM_INDIRECT invalid arch\n");
+               }
+
                off = mem->offset;
                reg = mem->offr_nr;
-               val |= 0xA00;
-               if (off & ~0x3F) {
-                       asm_warn(ctx, "INDIRECT memoffset 0x%X too long (> 6 bits)", off);
-                       off &= 0x3F;
+               if (off & ~off_mask) {
+                       asm_warn(ctx, "INDIRECT memoffset 0x%X too long (> %u bits)",
+                                off, reg_shift);
+                       off &= off_mask;
                }
-               if (reg & ~0x7)
+               if (reg > 6) {
+                       /* Assembler bug. The parser shouldn't pass this value. */
                        asm_error(ctx, "OFFR-nr too big");
+               }
+               if (reg == 6) {
+                       asm_warn(ctx, "Using offset register 6. This register is broken "
+                                "on certain devices. Use off0 to off5 only.");
+               }
                val |= off;
-               val |= (reg << 6);
+               val |= (reg << reg_shift);
                break;
        default:
                asm_error(ctx, "generate_mem_operand() memtype");
@@ -345,9 +425,9 @@ static void generate_operand(struct assembler_context *ctx,
        }
 }
 
-static void do_assemble_insn(struct assembler_context *ctx,
-                            struct instruction *insn,
-                            unsigned int opcode)
+static struct code_output * do_assemble_insn(struct assembler_context *ctx,
+                                            struct instruction *insn,
+                                            unsigned int opcode)
 {
        int i;
        struct operlist *ol;
@@ -398,6 +478,29 @@ static void do_assemble_insn(struct assembler_context *ctx,
                               "lowlevel do_assemble_insn");
 
        list_add_tail(&out->list, &ctx->output);
+
+       return out;
+}
+
+static void do_assemble_ret(struct assembler_context *ctx,
+                           struct instruction *insn,
+                           unsigned int opcode)
+{
+       struct code_output *out;
+
+       /* Get the previous instruction and check whether it
+        * is a jump instruction. */
+       list_for_each_entry_reverse(out, &ctx->output, list) {
+               /* Search the last insn. */
+               if (out->type == OUT_INSN) {
+                       if (out->is_jump_insn) {
+                               asm_warn(ctx, "RET instruction directly after "
+                                        "jump instruction. The hardware won't like this.");
+                       }
+                       break;
+               }
+       }
+       do_assemble_insn(ctx, insn, opcode);
 }
 
 static unsigned int merge_ext_into_opcode(struct assembler_context *ctx,
@@ -429,19 +532,34 @@ static unsigned int merge_external_jmp_into_opcode(struct assembler_context *ctx
                                                   unsigned int opbase,
                                                   struct instruction *insn)
 {
+       struct operand *fake;
+       struct registr *fake_reg;
+       struct operand *target;
        struct operlist *ol;
        unsigned int cond;
        unsigned int opcode;
 
        ol = insn->operands;
        opcode = opbase;
-       cond = ol->oper[0]->u.raw;
+       cond = ol->oper[0]->u.imm->imm;
        if (cond & ~0xFF)
                asm_error(ctx, "External jump condition value too big (> 0xFF)");
        opcode |= cond;
-       ol->oper[0] = ol->oper[1];
-       ol->oper[1] = ol->oper[2];
-       ol->oper[2] = ol->oper[3];
+       target = ol->oper[1];
+       memset(ol->oper, 0, sizeof(ol->oper));
+
+       /* This instruction has two fake r0 operands
+        * at position 0 and 1. */
+       fake = xmalloc(sizeof(*fake));
+       fake_reg = xmalloc(sizeof(*fake_reg));
+       fake->type = OPER_REG;
+       fake->u.reg = fake_reg;
+       fake_reg->type = GPR;
+       fake_reg->nr = 0;
+
+       ol->oper[0] = fake;
+       ol->oper[1] = fake;
+       ol->oper[2] = target;
 
        return opcode;
 }
@@ -481,7 +599,7 @@ static void emulate_mov_insn(struct assembler_context *ctx,
                tmp = in->u.imm->imm;
                if (!is_possible_imm(tmp))
                        asm_error(ctx, "MOV operand 0x%X > 16bit", tmp);
-               if (!is_valid_imm(tmp)) {
+               if (!is_valid_imm(ctx, tmp)) {
                        /* Immediate too big for plain OR */
                        em_insn.op = OP_ORX;
 
@@ -515,19 +633,20 @@ static void emulate_jmp_insn(struct assembler_context *ctx,
 {
        struct instruction em_insn;
        struct operlist em_ol;
-       struct operand em_op;
-       struct immediate em_imm;
-
-       /* This is a pseudo-OP. We emulate it by JE */
-
-       em_insn.op = OP_JE;
-       em_imm.imm = 1;
-       em_op.type = OPER_IMM;
-       em_op.u.imm = &em_imm;
-       em_ol.oper[0] = &em_op;
-       em_ol.oper[1] = &em_op;
-       em_ol.oper[2] = insn->operands->oper[0];
+       struct immediate em_condition;
+       struct operand em_cond_op;
+
+       /* This is a pseudo-OP. We emulate it with
+        * JEXT 0x7F, target */
+
+       em_insn.op = OP_JEXT;
+       em_condition.imm = 0x7F; /* Ext cond: Always true */
+       em_cond_op.type = OPER_IMM;
+       em_cond_op.u.imm = &em_condition;
+       em_ol.oper[0] = &em_cond_op;
+       em_ol.oper[1] = insn->operands->oper[0]; /* Target */
        em_insn.operands = &em_ol;
+
        assemble_instruction(ctx, &em_insn); /* recurse */
 }
 
@@ -535,6 +654,7 @@ static void emulate_jand_insn(struct assembler_context *ctx,
                              struct instruction *insn,
                              int inverted)
 {
+       struct code_output *out;
        struct instruction em_insn;
        struct operlist em_ol;
        struct operand em_op_shift;
@@ -563,7 +683,7 @@ static void emulate_jand_insn(struct assembler_context *ctx,
                 * Check if it's representable by a normal JAND insn.
                 */
                tmp = imm_oper->u.imm->imm;
-               if (!is_valid_imm(tmp)) {
+               if (!is_valid_imm(ctx, tmp)) {
                        /* Nope, this must be emulated by JZX/JNZX */
                        if (!is_contiguous_bitmask(tmp)) {
                                asm_error(ctx, "Long bitmask 0x%X is not contiguous",
@@ -604,14 +724,16 @@ static void emulate_jand_insn(struct assembler_context *ctx,
 
        /* Do a normal JAND/JNAND instruction */
        if (inverted)
-               do_assemble_insn(ctx, insn, 0x040 | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x040 | 0x1);
        else
-               do_assemble_insn(ctx, insn, 0x040);
+               out = do_assemble_insn(ctx, insn, 0x040);
+       out->is_jump_insn = 1;
 }
 
 static void assemble_instruction(struct assembler_context *ctx,
                                 struct instruction *insn)
 {
+       struct code_output *out;
        unsigned int opcode;
 
        switch (insn->op) {
@@ -687,62 +809,91 @@ static void assemble_instruction(struct assembler_context *ctx,
                emulate_jand_insn(ctx, insn, 1);
                return;
        case OP_JS:
-               do_assemble_insn(ctx, insn, 0x050);
+               out = do_assemble_insn(ctx, insn, 0x050);
+               out->is_jump_insn = 1;
                break;
        case OP_JNS:
-               do_assemble_insn(ctx, insn, 0x050 | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x050 | 0x1);
+               out->is_jump_insn = 1;
                break;
        case OP_JE:
-               do_assemble_insn(ctx, insn, 0x0D0);
+               out = do_assemble_insn(ctx, insn, 0x0D0);
+               out->is_jump_insn = 1;
                break;
        case OP_JNE:
-               do_assemble_insn(ctx, insn, 0x0D0 | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x0D0 | 0x1);
+               out->is_jump_insn = 1;
                break;
        case OP_JLS:
-               do_assemble_insn(ctx, insn, 0x0D2);
+               out = do_assemble_insn(ctx, insn, 0x0D2);
+               out->is_jump_insn = 1;
                break;
        case OP_JGES:
-               do_assemble_insn(ctx, insn, 0x0D2 | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x0D2 | 0x1);
+               out->is_jump_insn = 1;
                break;
        case OP_JGS:
-               do_assemble_insn(ctx, insn, 0x0D4);
+               out = do_assemble_insn(ctx, insn, 0x0D4);
+               out->is_jump_insn = 1;
                break;
        case OP_JLES:
-               do_assemble_insn(ctx, insn, 0x0D4 | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x0D4 | 0x1);
+               out->is_jump_insn = 1;
                break;
        case OP_JL:
-               do_assemble_insn(ctx, insn, 0x0DA);
+               out = do_assemble_insn(ctx, insn, 0x0DA);
+               out->is_jump_insn = 1;
                break;
        case OP_JGE:
-               do_assemble_insn(ctx, insn, 0x0DA | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x0DA | 0x1);
+               out->is_jump_insn = 1;
                break;
        case OP_JG:
-               do_assemble_insn(ctx, insn, 0x0DC);
+               out = do_assemble_insn(ctx, insn, 0x0DC);
                break;
        case OP_JLE:
-               do_assemble_insn(ctx, insn, 0x0DC | 0x1);
+               out = do_assemble_insn(ctx, insn, 0x0DC | 0x1);
+               out->is_jump_insn = 1;
                break;
        case OP_JZX:
                opcode = merge_ext_into_opcode(ctx, 0x400, insn);
-               do_assemble_insn(ctx, insn, opcode);
+               out = do_assemble_insn(ctx, insn, opcode);
+               out->is_jump_insn = 1;
                break;
        case OP_JNZX:
                opcode = merge_ext_into_opcode(ctx, 0x500, insn);
-               do_assemble_insn(ctx, insn, opcode);
+               out = do_assemble_insn(ctx, insn, opcode);
+               out->is_jump_insn = 1;
                break;
        case OP_JEXT:
                opcode = merge_external_jmp_into_opcode(ctx, 0x700, insn);
-               do_assemble_insn(ctx, insn, opcode);
+               out = do_assemble_insn(ctx, insn, opcode);
+               out->is_jump_insn = 1;
                break;
        case OP_JNEXT:
                opcode = merge_external_jmp_into_opcode(ctx, 0x600, insn);
-               do_assemble_insn(ctx, insn, opcode);
+               out = do_assemble_insn(ctx, insn, opcode);
+               out->is_jump_insn = 1;
                break;
        case OP_CALL:
+               if (ctx->arch != 5)
+                       asm_error(ctx, "'call' instruction is only supported on arch 5");
                do_assemble_insn(ctx, insn, 0x002);
                break;
+       case OP_CALLS:
+               if (ctx->arch != 15)
+                       asm_error(ctx, "'calls' instruction is only supported on arch 15");
+               do_assemble_insn(ctx, insn, 0x004);
+               break;
        case OP_RET:
-               do_assemble_insn(ctx, insn, 0x003);
+               if (ctx->arch != 5)
+                       asm_error(ctx, "'ret' instruction is only supported on arch 5");
+               do_assemble_ret(ctx, insn, 0x003);
+               break;
+       case OP_RETS:
+               if (ctx->arch != 15)
+                       asm_error(ctx, "'rets' instruction is only supported on arch 15");
+               do_assemble_insn(ctx, insn, 0x005);
                break;
        case OP_TKIPH:
        case OP_TKIPHS:
@@ -913,8 +1064,16 @@ recalculate_addresses:
                                if (addr < 0)
                                        goto does_not_exist;
                                c->operands[i].u.operand = addr;
-                               if (i != 2) /* Is not a jump target */
-                                       c->operands[i].u.operand |= 0xC00; /* Make it be an immediate */
+                               if (i != 2) {
+                                       /* Is not a jump target.
+                                        * Make it be an immediate */
+                                       if (ctx->arch == 5)
+                                               c->operands[i].u.operand |= 0xC00;
+                                       else if (ctx->arch == 15)
+                                               c->operands[i].u.operand |= 0xC00 << 1;
+                                       else
+                                               asm_error(ctx, "Internal error: label res imm");
+                               }
                        }
                        break;
                case OUT_LABEL:
@@ -931,25 +1090,21 @@ does_not_exist:
 static void emit_code(struct assembler_context *ctx)
 {
        FILE *fd;
-       char *fn;
-       size_t fn_len;
+       const char *fn;
        struct code_output *c;
        uint64_t code;
        unsigned char outbuf[8];
-       unsigned int insn_count = 0;
+       unsigned int insn_count = 0, insn_count_limit;
        struct fw_header hdr;
 
-       fn_len = strlen(outfile_name) + 20;
-       fn = xmalloc(fn_len);
-       snprintf(fn, fn_len, "%s.ucode", outfile_name);
+       fn = outfile_name;
        fd = fopen(fn, "w+");
        if (!fd) {
                fprintf(stderr, "Could not open microcode output file \"%s\"\n", fn);
-               free(fn);
                exit(1);
        }
        if (IS_VERBOSE_DEBUG)
-               fprintf(stderr, "\nCode:\n");
+               printf("\nCode:\n");
 
        list_for_each_entry(c, &ctx->output, list) {
                switch (c->type) {
@@ -961,52 +1116,94 @@ static void emit_code(struct assembler_context *ctx)
                }
        }
 
-       memset(&hdr, 0, sizeof(hdr));
-       hdr.type = FW_TYPE_UCODE;
-       hdr.ver = FW_HDR_VER;
-       hdr.size = cpu_to_be32(8 * insn_count);
-       if (fwrite(&hdr, sizeof(hdr), 1, fd) != 1) {
-               fprintf(stderr, "Could not write microcode outfile\n");
-               exit(1);
+       switch (cmdargs.outformat) {
+       case FMT_RAW_LE32:
+       case FMT_RAW_BE32:
+               /* Nothing */
+               break;
+       case FMT_B43:
+               memset(&hdr, 0, sizeof(hdr));
+               hdr.type = FW_TYPE_UCODE;
+               hdr.ver = FW_HDR_VER;
+               hdr.size = cpu_to_be32(8 * insn_count);
+               if (fwrite(&hdr, sizeof(hdr), 1, fd) != 1) {
+                       fprintf(stderr, "Could not write microcode outfile\n");
+                       exit(1);
+               }
+               break;
        }
 
-       if (insn_count > NUM_INSN_LIMIT)
-               asm_warn(ctx, "Generating more than %d instructions. This "
+       switch (ctx->arch) {
+       case 5:
+               insn_count_limit = NUM_INSN_LIMIT_R5;
+               break;
+       case 15:
+               insn_count_limit = ~0; //FIXME limit currently unknown.
+               break;
+       default:
+               asm_error(ctx, "Internal error: emit_code unknown arch\n");
+       }
+       if (insn_count > insn_count_limit)
+               asm_warn(ctx, "Generating more than %u instructions. This "
                              "will overflow the device microcode memory.",
-                        NUM_INSN_LIMIT);
+                        insn_count_limit);
 
        list_for_each_entry(c, &ctx->output, list) {
                switch (c->type) {
                case OUT_INSN:
                        if (IS_VERBOSE_DEBUG) {
-                               fprintf(stderr, "%03X %03X,%03X,%03X\n",
+                               printf("%03X %04X,%04X,%04X\n",
                                        c->opcode,
                                        c->operands[0].u.operand,
                                        c->operands[1].u.operand,
                                        c->operands[2].u.operand);
                        }
-                       code = 0;
-
-                       /* Instruction binary format is: xxyy yzzz  0000 oooX
-                        * Big-Endian, X is the most significant part of Xxx.
-                        */
-                       code |= (c->opcode << 4);
-
-                       code |= (((uint64_t)c->operands[0].u.operand & 0xF00) >> 8);
-                       code |= (((uint64_t)c->operands[0].u.operand & 0x0FF) << 56);
 
-                       code |= ((uint64_t)c->operands[1].u.operand << 44);
-
-                       code |= ((uint64_t)c->operands[2].u.operand << 32);
+                       switch (ctx->arch) {
+                       case 5:
+                               code = 0;
+                               code |= ((uint64_t)c->operands[2].u.operand);
+                               code |= ((uint64_t)c->operands[1].u.operand) << 12;
+                               code |= ((uint64_t)c->operands[0].u.operand) << 24;
+                               code |= ((uint64_t)c->opcode) << 36;
+                               break;
+                       case 15:
+                               code = 0;
+                               code |= ((uint64_t)c->operands[2].u.operand);
+                               code |= ((uint64_t)c->operands[1].u.operand) << 13;
+                               code |= ((uint64_t)c->operands[0].u.operand) << 26;
+                               code |= ((uint64_t)c->opcode) << 39;
+                               break;
+                       default:
+                               asm_error(ctx, "No emit format for arch %u",
+                                         ctx->arch);
+                       }
 
-                       outbuf[7] = (code & 0x00000000000000FFULL);
-                       outbuf[6] = (code & 0x000000000000FF00ULL) >> 8;
-                       outbuf[5] = (code & 0x0000000000FF0000ULL) >> 16;
-                       outbuf[4] = (code & 0x00000000FF000000ULL) >> 24;
-                       outbuf[3] = (code & 0x000000FF00000000ULL) >> 32;
-                       outbuf[2] = (code & 0x0000FF0000000000ULL) >> 40;
-                       outbuf[1] = (code & 0x00FF000000000000ULL) >> 48;
-                       outbuf[0] = (code & 0xFF00000000000000ULL) >> 56;
+                       switch (cmdargs.outformat) {
+                       case FMT_B43:
+                       case FMT_RAW_BE32:
+                               code = ((code & (uint64_t)0xFFFFFFFF00000000ULL) >> 32) |
+                                      ((code & (uint64_t)0x00000000FFFFFFFFULL) << 32);
+                               outbuf[0] = (code & (uint64_t)0xFF00000000000000ULL) >> 56;
+                               outbuf[1] = (code & (uint64_t)0x00FF000000000000ULL) >> 48;
+                               outbuf[2] = (code & (uint64_t)0x0000FF0000000000ULL) >> 40;
+                               outbuf[3] = (code & (uint64_t)0x000000FF00000000ULL) >> 32;
+                               outbuf[4] = (code & (uint64_t)0x00000000FF000000ULL) >> 24;
+                               outbuf[5] = (code & (uint64_t)0x0000000000FF0000ULL) >> 16;
+                               outbuf[6] = (code & (uint64_t)0x000000000000FF00ULL) >> 8;
+                               outbuf[7] = (code & (uint64_t)0x00000000000000FFULL) >> 0;
+                               break;
+                       case FMT_RAW_LE32:
+                               outbuf[7] = (code & (uint64_t)0xFF00000000000000ULL) >> 56;
+                               outbuf[6] = (code & (uint64_t)0x00FF000000000000ULL) >> 48;
+                               outbuf[5] = (code & (uint64_t)0x0000FF0000000000ULL) >> 40;
+                               outbuf[4] = (code & (uint64_t)0x000000FF00000000ULL) >> 32;
+                               outbuf[3] = (code & (uint64_t)0x00000000FF000000ULL) >> 24;
+                               outbuf[2] = (code & (uint64_t)0x0000000000FF0000ULL) >> 16;
+                               outbuf[1] = (code & (uint64_t)0x000000000000FF00ULL) >> 8;
+                               outbuf[0] = (code & (uint64_t)0x00000000000000FFULL) >> 0;
+                               break;
+                       }
 
                        if (fwrite(&outbuf, ARRAY_SIZE(outbuf), 1, fd) != 1) {
                                fprintf(stderr, "Could not write microcode outfile\n");
@@ -1017,8 +1214,14 @@ static void emit_code(struct assembler_context *ctx)
                        break;
                }
        }
+
+       if (cmdargs.print_sizes) {
+               printf("%s:  text = %u instructions (%u bytes)\n",
+                      fn, insn_count,
+                      (unsigned int)(insn_count * sizeof(uint64_t)));
+       }
+
        fclose(fd);
-       free(fn);
 }
 
 static void assemble(void)
@@ -1051,8 +1254,12 @@ int main(int argc, char **argv)
        int err, res = 1;
 
        err = parse_args(argc, argv);
-       if (err)
+       if (err < 0)
                goto out;
+       if (err > 0) {
+               res = 0;
+               goto out;
+       }
        err = open_input_file();
        if (err)
                goto out;