arch/tile/lib/atomic_asm_32.S

   1 /*
   2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
   3  *
   4  *   This program is free software; you can redistribute it and/or
   5  *   modify it under the terms of the GNU General Public License
   6  *   as published by the Free Software Foundation, version 2.
   7  *
   8  *   This program is distributed in the hope that it will be useful, but
   9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  *   NON INFRINGEMENT.  See the GNU General Public License for
  12  *   more details.
  13  *
  14  * Support routines for atomic operations.  Each function takes:
  15  *
  16  * r0: address to manipulate
  17  * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
  18  * r2: new value to write, or for cmpxchg/add_unless, value to compare against
  19  * r3: (cmpxchg/xchg_add_unless) new value to write or add;
  20  *     (atomic64 ops) high word of value to write
  21  * r4/r5: (cmpxchg64/add_unless64) new value to write or add
  22  *
  23  * The 32-bit routines return a "struct __get_user" so that the futex code
  24  * has an opportunity to return -EFAULT to the user if needed.
  25  * The 64-bit routines just return a "long long" with the value,
  26  * since they are only used from kernel space and don't expect to fault.
  27  * Support for 16-bit ops is included in the framework but we don't provide any.
  28  *
  29  * Note that the caller is advised to issue a suitable L1 or L2
  30  * prefetch on the address being manipulated to avoid extra stalls.
  31  * In addition, the hot path is on two icache lines, and we start with
  32  * a jump to the second line to make sure they are both in cache so
  33  * that we never stall waiting on icache fill while holding the lock.
  34  * (This doesn't work out with most 64-bit ops, since they consume
  35  * too many bundles, so may take an extra i-cache stall.)
  36  *
  37  * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
  38  * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
  39  * the code, just page faults.
  40  *
  41  * If the load or store faults in a way that can be directly fixed in
  42  * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
  43  * directly, return to the instruction that faulted, and retry it.
  44  *
  45  * If the load or store faults in a way that potentially requires us
  46  * to release the atomic lock, then retry (e.g. a migrating PTE), we
  47  * reset the PC in do_page_fault_ics() to the "tns" instruction so
  48  * that on return we will reacquire the lock and restart the op.  We
  49  * are somewhat overloading the exception_table_entry notion by doing
  50  * this, since those entries are not normally used for migrating PTEs.
  51  *
  52  * If the main page fault handler discovers a bad address, it will see
  53  * the PC pointing to the "tns" instruction (due to the earlier
  54  * exception_table_entry processing in do_page_fault_ics), and
  55  * re-reset the PC to the fault handler, atomic_bad_address(), which
  56  * effectively takes over from the atomic op and can either return a
  57  * bad "struct __get_user" (for user addresses) or can just panic (for
  58  * bad kernel addresses).
  59  *
  60  * Note that if the value we would store is the same as what we
  61  * loaded, we bypass the store.  Other platforms with true atomics can
  62  * make the guarantee that a non-atomic __clear_bit(), for example,
  63  * can safely race with an atomic test_and_set_bit(); this example is
  64  * from bit_spinlock.h in slub_lock() / slub_unlock().  We can't do
  65  * that on Tile since the "atomic" op is really just a
  66  * read/modify/write, and can race with the non-atomic
  67  * read/modify/write.  However, if we can short-circuit the write when
  68  * it is not needed, in the atomic case, we avoid the race.
  69  */
  70
  71 #include <linux/linkage.h>
  72 #include <asm/atomic_32.h>
  73 #include <asm/page.h>
  74 #include <asm/processor.h>
  75
  76         .section .text.atomic,"ax"
  77 ENTRY(__start_atomic_asm_code)
  78
  79         .macro  atomic_op, name, bitwidth, body
  80         .align  64
  81 STD_ENTRY_SECTION(__atomic\name, .text.atomic)
  82         {
  83          movei  r24, 1
  84          j      4f              /* branch to second cache line */
  85         }
  86 1:      {
  87          .ifc \bitwidth,16
  88          lh     r22, r0
  89          .else
  90          lw     r22, r0
  91          addi   r28, r0, 4
  92          .endif
  93         }
  94         .ifc \bitwidth,64
  95         lw      r23, r28
  96         .endif
  97         \body /* set r24, and r25 if 64-bit */
  98         {
  99          seq    r26, r22, r24
 100          seq    r27, r23, r25
 101         }
 102         .ifc \bitwidth,64
 103         bbnst   r27, 2f
 104         .endif
 105         bbs     r26, 3f         /* skip write-back if it's the same value */
 106 2:      {
 107          .ifc \bitwidth,16
 108          sh     r0, r24
 109          .else
 110          sw     r0, r24
 111          .endif
 112         }
 113         .ifc \bitwidth,64
 114         sw      r28, r25
 115         .endif
 116         mf
 117 3:      {
 118          move   r0, r22
 119          .ifc \bitwidth,64
 120          move   r1, r23
 121          .else
 122          move   r1, zero
 123          .endif
 124          sw     ATOMIC_LOCK_REG_NAME, zero
 125         }
 126         mtspr   INTERRUPT_CRITICAL_SECTION, zero
 127         jrp     lr
 128 4:      {
 129          move   ATOMIC_LOCK_REG_NAME, r1
 130          mtspr  INTERRUPT_CRITICAL_SECTION, r24
 131         }
 132 #ifndef CONFIG_SMP
 133         j       1b              /* no atomic locks */
 134 #else
 135         {
 136          tns    r21, ATOMIC_LOCK_REG_NAME
 137          moveli r23, 2048       /* maximum backoff time in cycles */
 138         }
 139         {
 140          bzt    r21, 1b         /* branch if lock acquired */
 141          moveli r25, 32         /* starting backoff time in cycles */
 142         }
 143 5:      mtspr   INTERRUPT_CRITICAL_SECTION, zero
 144         mfspr   r26, CYCLE_LOW  /* get start point for this backoff */
 145 6:      mfspr   r22, CYCLE_LOW  /* test to see if we've backed off enough */
 146         sub     r22, r22, r26
 147         slt     r22, r22, r25
 148         bbst    r22, 6b
 149         {
 150          mtspr  INTERRUPT_CRITICAL_SECTION, r24
 151          shli   r25, r25, 1     /* double the backoff; retry the tns */
 152         }
 153         {
 154          tns    r21, ATOMIC_LOCK_REG_NAME
 155          slt    r26, r23, r25   /* is the proposed backoff too big? */
 156         }
 157         {
 158          bzt    r21, 1b         /* branch if lock acquired */
 159          mvnz   r25, r26, r23
 160         }
 161         j       5b
 162 #endif
 163         STD_ENDPROC(__atomic\name)
 164         .ifc \bitwidth,32
 165         .pushsection __ex_table,"a"
 166         .align  4
 167         .word   1b, __atomic\name
 168         .word   2b, __atomic\name
 169         .word   __atomic\name, __atomic_bad_address
 170         .popsection
 171         .endif
 172         .endm
 173
 174
 175 /*
 176  * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
 177  */
 178
 179 atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
 180 atomic_op 32_xchg, 32, "move r24, r2"
 181 atomic_op 32_xchg_add, 32, "add r24, r22, r2"
 182 atomic_op 32_xchg_add_unless, 32, \
 183         "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
 184 atomic_op 32_fetch_or, 32, "or r24, r22, r2"
 185 atomic_op 32_fetch_and, 32, "and r24, r22, r2"
 186 atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
 187 atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
 188
 189 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
 190         { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
 191 atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
 192 atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
 193         slt_u r26, r24, r22; add r25, r25, r26"
 194 atomic_op 64_xchg_add_unless, 64, \
 195         "{ sne r26, r22, r2; sne r27, r23, r3 }; \
 196         { bbns r26, 3f; add r24, r22, r4 }; \
 197         { bbns r27, 3f; add r25, r23, r5 }; \
 198         slt_u r26, r24, r22; add r25, r25, r26"
 199 atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
 200 atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
 201 atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 202
 203         jrp     lr              /* happy backtracer */
 204
 205 ENTRY(__end_atomic_asm_code)