Merge commit '86cc97e55fe346502462284d2e636a2b3708163e' as 'Sources/OpenVPN3'

2026-04-24 00:00:05 +08:00 · 2020-02-24 14:43:11 +03:00
parent 133b3756e6 86cc97e55f
commit 32f1555929
655 changed files with 146468 additions and 0 deletions
@@ -0,0 +1,32 @@
+--- aes-armv4.pl.orig	2012-09-03 00:16:20.000000000 -0600
+++ aes-armv4.pl	2012-09-03 00:17:22.000000000 -0600
+@@ -171,7 +170,8 @@
+ 	stmdb   sp!,{r1,r4-r12,lr}
+ 	mov	$rounds,r0		@ inp
+ 	mov	$key,r2
+-	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
+ad1=AES_encrypt-AES_Te
+	sub	$tbl,r3,#ad1	@ Te
+ #if __ARM_ARCH__<7
+ 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
+ 	ldrb	$t1,[$rounds,#2]	@ manner...
+@@ -426,7 +426,8 @@
+ 	bne	.Labrt
+ 
+ .Lok:	stmdb   sp!,{r4-r12,lr}
+-	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
+ad2=_armv4_AES_set_encrypt_key-AES_Te-1024
+	sub	$tbl,r3,#ad2	@ Te4
+ 
+ 	mov	$rounds,r0		@ inp
+ 	mov	lr,r1			@ bits
+@@ -887,7 +888,8 @@
+ 	stmdb   sp!,{r1,r4-r12,lr}
+ 	mov	$rounds,r0		@ inp
+ 	mov	$key,r2
+-	sub	$tbl,r3,#AES_decrypt-AES_Td		@ Td
+ad3=AES_decrypt-AES_Td
+	sub	$tbl,r3,#ad3		@ Td
+ #if __ARM_ARCH__<7
+ 	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
+ 	ldrb	$t1,[$rounds,#2]	@ manner...
@@ -0,0 +1,759 @@
+#!/usr/bin/env python
+#
+# arm-as-to-ios     Modify ARM assembly code for the iOS assembler
+#
+# Copyright (c) 2012 Psellos   http://psellos.com/
+# Licensed under the MIT License:
+#     http://www.opensource.org/licenses/mit-license.php
+#
+# Resources for running OCaml on iOS: http://psellos.com/ocaml/
+#
+import sys
+import re
+
+VERSION = '1.4.0'
+
+initial_glosyms = []
+initial_defsyms = []
+
+# Character classes for expression lexing.
+#
+g_ccid0 = '[$.A-Z_a-z\x80-\xff]'      # Beginning of id
+g_ccid =  '[$.0-9A-Z_a-z\x80-\xff]'   # Later in id
+def ccc(cc):                          # Complement the class
+    if cc[1] == '^':
+        return cc[0] + cc[2:]
+    return cc[0] + '^' + cc[1:]
+def ccce(cc):                         # Complement the class, include EOL
+    return '(?:' + ccc(cc) + '|$)'
+
+# Prefixes for pooled symbol labels and jump table base labels.  They're
+# in the space of Linux assembler local symbols.  Later rules will
+# modify them to the Loc() form.
+#
+g_poolpfx = '.LP'
+g_basepfx = '.LB'
+
+
+def exists(p, l):
+    for l1 in l:
+        if p(l1):
+            return True
+    return False
+
+
+def forall(p, l):
+    for l1 in l:
+        if not p(l1):
+            return False
+    return True
+
+
+def add_prefix(instrs):
+    # Add compatibility macros for all systems, plus hardware
+    # definitions and compatibility macros for iOS.
+    #
+    # All systems:
+    #
+    # Glo()     cpp macro for making global symbols (xxx vs _xxx)
+    # Loc()     cpp macro for making local symbols (.Lxxx vs Lxxx)
+    # .funtype  Expands to .thumb_func for iOS armv7 (null for armv6)
+    #           Expands to .type %function for others
+    #
+    # iOS:
+    #
+    # .machine  armv6/armv7
+    # .thumb    (for armv7)
+    # cbz       Expands to cmp/beq for armv6 (Thumb-only instr)
+    # .type     Not supported by Apple assembler
+    # .size     Not supported by Apple assembler
+    #
+    defre = '#[ \t]*if.*def.*SYS'  # Add new defs near first existing ones
+    skipre = '$|\.syntax[ \t]'     # Skip comment lines (and .syntax)
+
+    for i in range(len(instrs)):
+        if re.match(defre, instrs[i][1]):
+            break
+    else:
+        i = 0
+    for i in range(i, len(instrs)):
+        if not re.match(skipre, instrs[i][1]):
+            break
+    instrs[i:0] = [
+        ('', '', '\n'),
+        ('/* Apple compatibility macros */', '', '\n'),
+        ('', '#if defined(SYS_macosx)', '\n'),
+        ('', '#define Glo(s) _##s', '\n'),
+        ('', '#define Loc(s) L##s', '\n'),
+        ('', '#if defined(MODEL_armv6)', '\n'),
+        ('        ', '.machine  armv6', '\n'),
+        ('        ', '.macro  .funtype', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  cbz', '\n'),
+        ('        ', 'cmp     $0, #0', '\n'),
+        ('        ', 'beq     $1', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#else', '\n'),
+        ('        ', '.machine  armv7', '\n'),
+        ('', '#if !defined(NO_THUMB)', '\n'),
+        ('        ', '.thumb', '\n'),
+        ('', '#endif', '\n'),
+        ('        ', '.macro  .funtype', '\n'),
+        ('', '#if !defined(NO_THUMB)', '\n'),
+        ('        ', '.thumb_func $0', '\n'),
+        ('', '#endif', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#endif', '\n'),
+        ('        ', '.macro  .type', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  .size', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  .skip', '\n'),
+        ('        ', '.space $0', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  .fpu', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  .global', '\n'),
+        ('        ', '.globl $0', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#else', '\n'),
+        ('', '#define Glo(s) s', '\n'),
+        ('', '#define Loc(s) .L##s', '\n'),
+        ('        ', '.macro  .funtype symbol', '\n'),
+        ('        ', '.type  \\symbol, %function', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#endif', '\n'),
+        ('/* End Apple compatibility macros */', '', '\n'),
+        ('', '', '\n')
+    ]
+    return instrs
+
+
+# Regular expression for modified ldr lines
+#
+g_ldre = '(ldr[ \t][^,]*,[ \t]*)=(([^ \t\n@,/]|/(?!\*))*)(.*)'
+
+
+def explicit_address_loads(instrs):
+    # Linux assemblers allow the following:
+    #
+    #     ldr rM, =symbol
+    #
+    # which loads rM with [mov] (immediately) if possible, or creates an
+    # entry in memory for the symbol value and loads it PC-relatively
+    # with [ldr].
+    #
+    # The Apple assembler doesn't seem to support this notation.  If the
+    # value is a suitable constant, it emits a valid [mov].  Otherwise
+    # it seems to emit an invalid [ldr] that always generates an error.
+    # (At least I have not been able to make it work).  So, change uses
+    # of =symbol to explicit PC-relative loads.
+    #
+    # This requires a pool containing the addresses to be loaded.  For
+    # now, we just keep track of it ourselves and emit it into the text
+    # segment at the end of the file.
+    #
+    syms = {}
+    result = []
+
+    def repl1((syms, result), (a, b, c)):
+        global g_poolpfx
+        global g_ldre
+        (b1, b2, b3) = parse_iparts(b)
+        mo = re.match(g_ldre, b3, re.DOTALL)
+        if mo:
+            if mo.group(2) not in syms:
+                syms[mo.group(2)] = len(syms)
+            psym = mo.group(2)
+            if psym[0:2] == '.L':
+                psym = psym[2:]
+            newb3 = mo.group(1) + g_poolpfx + psym + mo.group(4)
+            result.append((a, b1 + b2 + newb3, c))
+        else:
+            result.append((a, b, c))
+        return (syms, result)
+
+    def pool1(result, s):
+        global g_poolpfx
+        psym = s
+        if psym[0:2] == '.L':
+            psym = psym[2:]
+        result.append(('', g_poolpfx + psym + ':', '\n'))
+        result.append(('        ', '.long ' + s, '\n'))
+        return result
+
+    reduce(repl1, instrs, (syms, result))
+    if len(syms) > 0:
+        result.append(('', '', '\n'))
+        result.append(('/* Pool of addresses loaded into registers */',
+                        '', '\n'))
+        result.append(('', '', '\n'))
+        result.append(('        ', '.text', '\n'))
+        result.append(('        ', '.align 2', '\n'))
+        reduce(pool1, sorted(syms, key=syms.get), result)
+    return result
+
+
+def global_symbols(instrs):
+    # The form of a global symbol differs between Linux assemblers and
+    # the Apple assember:
+    #
+    # Linux: xxx
+    # Apple: _xxx
+    #
+    # Change occurrences of global symbols to use the Glo() cpp macro
+    # defined in our prefix.
+    #
+    # We consider a symbol to be global if:
+    #
+    # a.  It appears in a .globl declaration; or
+    # b.  It is referenced, has global form, and is not defined
+    #
+    glosyms = set(initial_glosyms)
+    refsyms = set()
+    defsyms = set(initial_defsyms)
+    result = []
+
+    def findglo1 (glosyms, (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line; nothing to do
+            return glosyms
+        (b1, b2, b3) = parse_iparts(b)
+        mo = re.match('(\.globa?l)' + ccce(g_ccid), b3)
+        if mo:
+            tokens = parse_expr(b3[len(mo.group(1)):])
+            if forall(lambda t: token_type(t) in ['space', 'id', ','], tokens):
+                for t in tokens:
+                    if token_type(t) == 'id':
+                        glosyms.add(t)
+        return glosyms
+
+    def findref1 ((refsyms, skipct), (a, b, c)):
+
+        def looksglobal(s):
+            if re.match('(r|a|v|p|c|cr|f|s|d|q|mvax|wcgr)[0-9]+$', s, re.I):
+                return False # numbered registers
+            if re.match('(wr|sb|sl|fp|ip|sp|lr|pc)$', s, re.I):
+                return False # named registers
+            if re.match('(fpsid|fpscr|fpexc|mvfr1|mvfr0)$', s, re.I):
+                return False # more named registers
+            if re.match('(mvf|mvd|mvfx|mvdx|dspsc)$', s, re.I):
+                return False # even more named registers
+            if re.match('(wcid|wcon|wcssf|wcasf|acc)$', s, re.I):
+                return False # even more named registers
+            if re.match('\.$|\.L|[0-9]|#', s):
+                return False # dot, local symbol, or number
+            if re.match('(asl|lsl|lsr|asr|ror|rrx)$', s, re.I):
+                return False # shift names
+            return True
+
+        if re.match('#', b):
+            # Preprocessor line; nothing to do
+            return (refsyms, skipct)
+
+        # Track nesting of .macro/.endm.  For now, we don't look for
+        # global syms in macro defs.  (Avoiding scoping probs etc.)
+        #
+        if skipct > 0 and re.match('\.(endm|endmacro)' + ccce(g_ccid), b):
+            return (refsyms, skipct - 1)
+        if re.match('\.macro' + ccce(g_ccid), b):
+            return (refsyms, skipct + 1)
+        if skipct > 0:
+            return (refsyms, skipct)
+        if re.match('\.(type|size|syntax|arch|fpu)' + ccce(g_ccid), b):
+            return (refsyms, skipct)
+
+        (b1, b2, b3) = parse_iparts(b)
+        rtokens = parse_rexpr(b3)
+        if len(rtokens) > 1 and rtokens[1] == '.req':
+            # .req has atypical syntax; no symbol refs there anyway
+            return (refsyms, skipct)
+        for t in rtokens[1:]:
+            if token_type(t) == 'id' and looksglobal(t):
+                refsyms.add(t)
+        return (refsyms, skipct)
+
+    def finddef1(defsyms, (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line
+            return defsyms
+        (b1, b2, b3) = parse_iparts(b)
+        rtokens = parse_rexpr(b3)
+        if b1 != '':
+            defsyms.add(b1)
+        if len(rtokens) > 1 and rtokens[1] == '.req':
+            defsyms.add(rtokens[0])
+        return defsyms
+
+    def repl1((glosyms, result), (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line
+            result.append((a, b, c))
+            return (glosyms, result)
+        toglo = lambda s: 'Glo(' + s + ')'
+        (b1, b2, b3) = parse_iparts(b)
+        tokens = parse_expr(b3)
+
+        if b1 in glosyms:
+            b1 = toglo(b1)
+        for i in range(len(tokens)):
+            if token_type(tokens[i]) == 'id' and tokens[i] in glosyms:
+                tokens[i] = toglo(tokens[i])
+        result.append((a, b1 + b2 + ''.join(tokens), c))
+        return (glosyms, result)
+
+    reduce(findglo1, instrs, glosyms)
+    reduce(findref1, instrs, (refsyms, 0))
+    reduce(finddef1, instrs, defsyms)
+    glosyms |= (refsyms - defsyms)
+    reduce(repl1, instrs, (glosyms, result))
+    return result
+
+
+def local_symbols(instrs):
+    # The form of a local symbol differs between Linux assemblers and
+    # the Apple assember:
+    #
+    # Linux: .Lxxx
+    # Apple: Lxxx
+    #
+    # Change occurrences of local symbols to use the Loc() cpp macro
+    # defined in our prefix.
+    #
+    lsyms = set()
+    result = []
+
+    def find1 (lsyms, (a, b, c)):
+        mo = re.match('(\.L[^ \t:]*)[ \t]*:', b)
+        if mo:
+            lsyms.add(mo.group(1))
+        return lsyms
+
+    def repl1((lsyms, result), (a, b, c)):
+        matches = list(re.finditer('\.L[^ \t@:,+*/\-()]+', b))
+        if matches != []:
+            matches.reverse()
+            newb = b
+            for mo in matches:
+                if mo.group() in lsyms:
+                    newb = newb[0:mo.start()] + \
+                            'Loc(' + mo.group()[2:] + ')' + \
+                            newb[mo.end():]
+            result.append((a, newb, c))
+        else:
+            result.append((a, b, c))
+        return (lsyms, result)
+
+    reduce(find1, instrs, lsyms)
+    reduce(repl1, instrs, (lsyms, result))
+    return result
+
+
+def funtypes(instrs):
+    # Linux assemblers accept declarations like this:
+    #
+    #     .type  symbol, %function
+    #
+    # For Thumb functions, the Apple assembler wants to see:
+    #
+    #     .thumb_func symbol
+    #
+    # Handle this by converting declarations to this:
+    #
+    #     .funtype symbol
+    #
+    # Our prefix defines an appropriate .funtype macro for each
+    # environment.
+    #
+    result = []
+
+    def repl1(result, (a, b, c)):
+        mo = re.match('.type[ \t]+([^ \t,]*),[ \t]*%function', b)
+        if mo:
+            result.append((a, '.funtype  ' + mo.group(1), c))
+        else:
+            result.append((a, b, c))
+        return result
+
+    reduce(repl1, instrs, result)
+    return result
+
+
+def jump_tables(instrs):
+    # Jump tables for Linux assemblers often look like this:
+    #
+    #     tbh [pc, rM, lsl #1]
+    #     .short (.Labc-.)/2+0
+    #     .short (.Ldef-.)/2+1
+    #     .short (.Lghi-.)/2+2
+    #
+    # The Apple assembler disagrees about the meaning of this code,
+    # producing jump tables that don't work.  Convert to the following:
+    #
+    #     tbh [pc, rM, lsl #1]
+    # .LBxxx:
+    #     .short (.Labc-.LBxxx)/2
+    #     .short (.Ldef-.LBxxx)/2
+    #     .short (.Lghi-.LBxxx)/2
+    #
+    # In fact we just convert sequences of .short pseudo-ops of the
+    # right form.  There's no requirement that they follow a tbh
+    # instruction.
+    #
+    baselabs = []
+    result = []
+
+    def short_match(seq, op):
+        # Determine whether the op is a .short of the form that needs to
+        # be converted: .short (symbol-.)/2+k.  If so, return a pair
+        # containing the symbol and the value of k.  If not, return
+        # None.  The short can only be converted if there were at least
+        # k other .shorts in sequence before the current one.  A summary
+        # of the previous .shorts is in seq.
+        #
+        # (A real parser would do a better job, but this was quick to
+        # get working.)
+        #
+        sp = '([ \t]|/\*.*?\*/)*'              # space
+        sp1 = '([ \t]|/\*.*?\*/)+'             # at least 1 space
+        spe = '([ \t]|/\*.*?\*/|@[^\n]*)*$'    # end-of-instr space
+        expr_re0 = (
+            '\.short' + sp + '\(' + sp +       # .short (
+            '([^ \t+\-*/@()]+)' + sp +         # symbol
+            '-' + sp + '\.' + sp + '\)' + sp + # -.)
+            '/' + sp + '2' + spe               # /2 END
+        )
+        expr_re1 = (
+            '\.short' + sp + '\(' + sp +       # .short (
+            '([^ \t+\-*/@()]+)' + sp +         # symbol
+            '-' + sp + '\.' + sp + '\)' + sp + # -.)
+            '/' + sp + '2' + sp +              # /2
+            '\+' + sp +                        # +
+            '((0[xX])?[0-9]+)' + spe           # k END
+        )
+        expr_re2 = (
+            '\.short' + sp1 +                  # .short
+            '((0[xX])?[0-9]+)' + sp +          # k
+            '\+' + sp + '\(' + sp +            # +(
+            '([^ \t+\-*/@()]+)' + sp +         # symbol
+            '-' + sp + '\.' + sp + '\)' + sp + # -.)
+            '/' + sp + '2' + spe               # /2 END
+        )
+        mo = re.match(expr_re0, op)
+        if mo:
+            return(mo.group(3), 0)
+        mo = re.match(expr_re1, op)
+        if mo:
+            k = int(mo.group(11), 0)
+            if k > len(seq):
+                return None
+            return (mo.group(3), k)
+        mo = re.match(expr_re2, op)
+        if mo:
+            k = int(mo.group(2), 0)
+            if k > len(seq):
+                return None
+            return (mo.group(7), k)
+        return None
+
+    def conv1 ((baselabs, shortseq, label, result), (a, b, c)):
+        # Convert current instr (a,b,c) if it's a .short of the right
+        # form that spans a previous sequence of .shorts.
+        #
+        (b1, b2, b3) = parse_iparts(b)
+
+        if b3 == '':
+            # No operation: just note label if present.
+            result.append((a, b, c))
+            if re.match('\.L.', b1):
+                return (baselabs, shortseq, b1, result)
+            return (baselabs, shortseq, label, result)
+
+        if not re.match('.short[ \t]+[^ \t@]', b3):
+            # Not a .short: clear shortseq and label
+            result.append((a, b, c))
+            return (baselabs, [], '', result)
+
+        # We have a .short: figure out the label if any
+        if re.match('\.L', b1):
+            sl = b1
+        else:
+            sl = label
+
+        mpair = short_match(shortseq, b3)
+        if not mpair:
+            # A .short, but not of right form
+            shortseq.append((len(result), sl))
+            result.append((a, b, c))
+            return (baselabs, shortseq, '', result)
+
+        # OK, we have a .short to convert!
+        (sym, k) = mpair
+        shortseq.append((len(result), sl))
+
+        # Figure out base label (create one if necessary).
+        bx = len(shortseq) - 1 - k
+        bl = shortseq[bx][1]
+        if bl == '':
+            bl = g_basepfx + str(shortseq[bx][0])
+            shortseq[bx] = (shortseq[bx][0], bl)
+            baselabs.append(shortseq[bx])
+
+        op = '.short\t(' + sym + '-' + bl + ')/2'
+
+        result.append ((a, b1 + b2 + op, c))
+        return (baselabs, shortseq, '', result)
+
+    # Convert, accumulate result and new labels.
+    reduce(conv1, instrs, (baselabs, [], '', result))
+
+    # Add labels created here to the instruction stream.
+    baselabs.reverse()
+    for (ix, lab) in baselabs:
+        result[ix:0] = [('', lab + ':', '\n')]
+
+    # That does it
+    return result
+
+
+def dot_relative(instrs):
+    # The Apple assembler (or possibly the linker) has trouble with code
+    # that looks like this:
+    #
+    #     .word   .Label - . + 0x80000000
+    #     .word   0x1966
+    # .Label:
+    #     .word   0x1967
+    #
+    # One way to describe the problem is that the assembler marks the
+    # first .word for relocation when in fact it's an assembly-time
+    # constant.  Translate to the following form, which doesn't generate
+    # a relocation marking:
+    #
+    # DR0 =       .Label - . + 0x80000000
+    #     .word   DR0
+    #     .word   0x1966
+    # .Label:
+    #     .word   0x1967
+    #
+    prefix = 'DR'
+    pseudos = '(\.byte|\.short|\.word|\.long|\.quad)'
+    result = []
+
+    def tok_ok(t):
+        return t in ['.', '+', '-', '(', ')'] or \
+            token_type(t) in ['space', 'locid', 'number']
+
+    def dotrel_match(expr):
+        # Determine whether the expression is one that needs to be
+        # translated.
+        tokens = parse_expr(expr)
+        return forall(tok_ok, tokens) and \
+            exists(lambda t: token_type(t) == 'locid', tokens) and \
+            exists(lambda t: token_type(t) == 'number', tokens) and \
+            exists(lambda t: t == '-', tokens) and \
+            exists(lambda t: t == '.', tokens)
+
+    def conv1(result, (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line
+            result.append((a, b, c))
+        else:
+            (b1, b2, b3) = parse_iparts(b)
+            mo = re.match(pseudos + ccce(g_ccid), b3)
+            if mo:
+                p = mo.group(1)
+                expr = b3[len(p):]
+                if dotrel_match(expr):
+                    sym = prefix + str(len(result))
+                    instr = sym + ' =' + expr
+                    result.append(('', instr, '\n'))
+                    result.append((a, b1 + b2 + p + ' ' + sym, c))
+                else:
+                    result.append((a, b, c))
+            else:
+                result.append((a, b, c))
+        return result
+
+    reduce(conv1, instrs, result)
+    return result
+
+
+def read_input():
+    # Concatenate all the input files into a string.
+    #
+    def fnl(s):
+        if s == '' or s[-1] == '\n':
+            return s
+        else:
+            return s + '\n'
+
+    if len(sys.argv) < 2:
+        return fnl(sys.stdin.read())
+    else:
+        input = ""
+        for f in sys.argv[1:]:
+            # allow global symbols to be enabled or disabled, eg:
+            #   --global=foo,!bar
+            #  foo is forced to be global
+            #  bar is forced to be non-global
+            if f.startswith('--global='):
+                glist = f[9:].split(',')
+                for g in glist:
+                    if g.startswith('!'):
+                        initial_defsyms.append(g[1:])
+                    else:
+                        initial_glosyms.append(g)
+            elif f.startswith('--stdin'):
+                input = input + fnl(sys.stdin.read())
+            else:
+                try:
+                    fd = open(f)
+                    input = input + fnl(fd.read())
+                    fd.close()
+                except:
+                    sys.stderr.write('arm-as-to-ios: cannot open ' + f + '\n')
+        return input
+
+
+def parse_instrs(s):
+    # Parse the string into assembly instructions, also noting C
+    # preprocessor lines.  Each instruction is represented as a triple:
+    # (space/comments, instruction, end).  The end is either ';' or
+    # '\n'.
+    #
+    def goodmo(mo):
+        if mo == None:
+            # Should never happen
+            sys.stderr.write('arm-as-to-ios: internal parsing error\n')
+            sys.exit(1)
+
+    cpp_re = '([ \t]*)(#([^\n]*\\\\\n)*[^\n]*[^\\\\\n])\n'
+    comment_re = '[ \t]*#[^\n]*'
+    instr_re = (
+        '(([ \t]|/\*.*?\*/|@[^\n]*)*)'  # Spaces & comments
+        '(([ \t]|/\*.*?\*/|[^;\n])*)'   # "Instruction"
+        '([;\n])'                       # End
+    )
+    instrs = []
+    while s != '':
+        if re.match('[ \t]*#[ \t]*(if|ifdef|elif|else|endif|define)', s):
+            mo = re.match(cpp_re, s)
+            goodmo(mo)
+            instrs.append((mo.group(1), mo.group(2), '\n'))
+        elif re.match('[ \t]*#', s):
+            mo = re.match(comment_re, s)
+            goodmo(mo)
+            instrs.append((mo.group(0), '', '\n'))
+        else:
+            mo = re.match(instr_re, s, re.DOTALL)
+            goodmo(mo)
+            instrs.append((mo.group(1), mo.group(3), mo.group(5)))
+        s = s[len(mo.group(0)):]
+    return instrs
+
+
+def parse_iparts(i):
+    # Parse an instruction into smaller parts, returning a triple of
+    # strings (label, colon, operation).  The colon part also contains
+    # any surrounding spaces and comments (making the label and the
+    # operation cleaner to process).
+    #
+    # (Caller warrants that the given string doesn't start with space or
+    # a comment.  This is true for strings returned by the instruction
+    # parser.)
+    #
+    lab_re = (
+        '([^ \t:/@]+)'                  # Label
+        '(([ \t]|/\*.*?\*/|@[^\n]*)*)'  # Spaces & comments
+        ':'                             # Colon
+        '(([ \t]|/\*.*?\*/|@[^\n]*)*)'  # Spaces & comments
+        '([^\n]*)'                      # Operation
+    )
+
+    if len(i) > 0 and i[0] == '#':
+        # C preprocessor line; treat as operation.
+        return ('', '', i)
+    mo = re.match(lab_re, i)
+    if mo:
+        return (mo.group(1), mo.group(2) + ':' + mo.group(4), mo.group(6))
+    # No label, just an operation
+    return ('', '', i)
+
+
+def parse_expr(s):
+    # Parse a string into a sequence of tokens.  A segment of white
+    # space (including comments) is treated as a token, so that the
+    # tokens can be reassembled into the string again.
+    #
+    result = []
+    while s != '':
+        mo = re.match('([ \t]|/\*.*?\*/|@.*)+', s)
+        if not mo:
+            # Glo(...) and Loc(...) are single tokens
+            mo = re.match('(Glo|Loc)\([^()]*\)', s)
+        if not mo:
+            mo = re.match('"([^\\\\"]|\\\\.)*"', s)
+        if not mo:
+            mo = re.match(g_ccid0 + g_ccid + '*', s)
+        if not mo:
+            mo = re.match('[0-9]+[bf]', s)
+        if not mo:
+            mo = re.match('0[Xx][0-9a-fA-F]+|[0-9]+', s)
+        if not mo:
+            mo = re.match('.', s)
+        result.append(mo.group(0))
+        s = s[len(mo.group(0)):]
+    return result
+
+
+def parse_rexpr(s):
+    # Like parse_expr(), but return only "real" tokens, not the
+    # intervening space.
+    #
+    return filter(lambda t: token_type(t) != 'space', parse_expr(s))
+
+
+def token_type(t):
+    # Determine the type of a token.  Caller warrants that it was
+    # returned by parse_expr() or parse_rexpr().
+    #
+    if re.match('[ \t]|/\*|@', t):
+        return 'space'
+    if re.match('Glo\(', t):
+        return 'gloid'
+    if re.match('Loc\(', t):
+        return 'locid'
+    if re.match('"', t):
+        return 'string'
+    if re.match(g_ccid0, t):
+        return 'id'
+    if re.match('[0-9]+[bf]', t):
+        return 'label'
+    if re.match('[0-9]', t):
+        return 'number'
+    return t # Sui generis
+
+
+def debug_parse(a, b, c):
+    # Show results of instuction stream parse.
+    #
+    (b1, b2, b3) = parse_iparts(b)
+    newb = '{' + b1 + '}' + '{' + b2 + '}' + '{' + b3 + '}'
+    sys.stdout.write('{' + a + '}' + newb + c)
+
+
+def main():
+    instrs = parse_instrs(read_input())
+    instrs = explicit_address_loads(instrs)
+    instrs = funtypes(instrs)
+    instrs = jump_tables(instrs)
+    instrs = global_symbols(instrs)
+    instrs = local_symbols(instrs)
+    instrs = dot_relative(instrs)
+    instrs = add_prefix(instrs)
+    for (a, b, c) in instrs:
+       sys.stdout.write(a + b + c)
+
+
+main()
@@ -0,0 +1,730 @@
+#!/usr/bin/env python
+#
+# arm-as-to-ios     Modify ARM assembly code for the iOS assembler
+#
+# Copyright (c) 2012 Psellos   http://psellos.com/
+# Licensed under the MIT License:
+#     http://www.opensource.org/licenses/mit-license.php
+#
+# Resources for running OCaml on iOS: http://psellos.com/ocaml/
+#
+import sys
+import re
+
+VERSION = '1.4.0'
+
+# Character classes for expression lexing.
+#
+g_ccid0 = '[$.A-Z_a-z\x80-\xff]'      # Beginning of id
+g_ccid =  '[$.0-9A-Z_a-z\x80-\xff]'   # Later in id
+def ccc(cc):                          # Complement the class
+    if cc[1] == '^':
+        return cc[0] + cc[2:]
+    return cc[0] + '^' + cc[1:]
+def ccce(cc):                         # Complement the class, include EOL
+    return '(?:' + ccc(cc) + '|$)'
+
+# Prefixes for pooled symbol labels and jump table base labels.  They're
+# in the space of Linux assembler local symbols.  Later rules will
+# modify them to the Loc() form.
+#
+g_poolpfx = '.LP'
+g_basepfx = '.LB'
+
+
+def exists(p, l):
+    for l1 in l:
+        if p(l1):
+            return True
+    return False
+
+
+def forall(p, l):
+    for l1 in l:
+        if not p(l1):
+            return False
+    return True
+
+
+def add_prefix(instrs):
+    # Add compatibility macros for all systems, plus hardware
+    # definitions and compatibility macros for iOS.
+    #
+    # All systems:
+    #
+    # Glo()     cpp macro for making global symbols (xxx vs _xxx)
+    # Loc()     cpp macro for making local symbols (.Lxxx vs Lxxx)
+    # .funtype  Expands to .thumb_func for iOS armv7 (null for armv6)
+    #           Expands to .type %function for others
+    #
+    # iOS:
+    #
+    # .machine  armv6/armv7
+    # .thumb    (for armv7)
+    # cbz       Expands to cmp/beq for armv6 (Thumb-only instr)
+    # .type     Not supported by Apple assembler
+    # .size     Not supported by Apple assembler
+    #
+    defre = '#[ \t]*if.*def.*SYS'  # Add new defs near first existing ones
+    skipre = '$|\.syntax[ \t]'     # Skip comment lines (and .syntax)
+
+    for i in range(len(instrs)):
+        if re.match(defre, instrs[i][1]):
+            break
+    else:
+        i = 0
+    for i in range(i, len(instrs)):
+        if not re.match(skipre, instrs[i][1]):
+            break
+    instrs[i:0] = [
+        ('', '', '\n'),
+        ('/* Apple compatibility macros */', '', '\n'),
+        ('', '#if defined(SYS_macosx)', '\n'),
+        ('', '#define Glo(s) _##s', '\n'),
+        ('', '#define Loc(s) L##s', '\n'),
+        ('', '#if defined(MODEL_armv6)', '\n'),
+        ('        ', '.machine  armv6', '\n'),
+        ('        ', '.macro  .funtype', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  cbz', '\n'),
+        ('        ', 'cmp     $0, #0', '\n'),
+        ('        ', 'beq     $1', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#else', '\n'),
+        ('        ', '.machine  armv7', '\n'),
+        ('        ', '.thumb', '\n'),
+        ('        ', '.macro  .funtype', '\n'),
+        ('        ', '.thumb_func $0', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#endif', '\n'),
+        ('        ', '.macro  .type', '\n'),
+        ('        ', '.endm', '\n'),
+        ('        ', '.macro  .size', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#else', '\n'),
+        ('', '#define Glo(s) s', '\n'),
+        ('', '#define Loc(s) .L##s', '\n'),
+        ('        ', '.macro  .funtype symbol', '\n'),
+        ('        ', '.type  \\symbol, %function', '\n'),
+        ('        ', '.endm', '\n'),
+        ('', '#endif', '\n'),
+        ('/* End Apple compatibility macros */', '', '\n'),
+        ('', '', '\n')
+    ]
+    return instrs
+
+
+# Regular expression for modified ldr lines
+#
+g_ldre = '(ldr[ \t][^,]*,[ \t]*)=(([^ \t\n@,/]|/(?!\*))*)(.*)'
+
+
+def explicit_address_loads(instrs):
+    # Linux assemblers allow the following:
+    #
+    #     ldr rM, =symbol
+    #
+    # which loads rM with [mov] (immediately) if possible, or creates an
+    # entry in memory for the symbol value and loads it PC-relatively
+    # with [ldr].
+    #
+    # The Apple assembler doesn't seem to support this notation.  If the
+    # value is a suitable constant, it emits a valid [mov].  Otherwise
+    # it seems to emit an invalid [ldr] that always generates an error.
+    # (At least I have not been able to make it work).  So, change uses
+    # of =symbol to explicit PC-relative loads.
+    #
+    # This requires a pool containing the addresses to be loaded.  For
+    # now, we just keep track of it ourselves and emit it into the text
+    # segment at the end of the file.
+    #
+    syms = {}
+    result = []
+
+    def repl1((syms, result), (a, b, c)):
+        global g_poolpfx
+        global g_ldre
+        (b1, b2, b3) = parse_iparts(b)
+        mo = re.match(g_ldre, b3, re.DOTALL)
+        if mo:
+            if mo.group(2) not in syms:
+                syms[mo.group(2)] = len(syms)
+            psym = mo.group(2)
+            if psym[0:2] == '.L':
+                psym = psym[2:]
+            newb3 = mo.group(1) + g_poolpfx + psym + mo.group(4)
+            result.append((a, b1 + b2 + newb3, c))
+        else:
+            result.append((a, b, c))
+        return (syms, result)
+
+    def pool1(result, s):
+        global g_poolpfx
+        psym = s
+        if psym[0:2] == '.L':
+            psym = psym[2:]
+        result.append(('', g_poolpfx + psym + ':', '\n'))
+        result.append(('        ', '.long ' + s, '\n'))
+        return result
+
+    reduce(repl1, instrs, (syms, result))
+    if len(syms) > 0:
+        result.append(('', '', '\n'))
+        result.append(('/* Pool of addresses loaded into registers */',
+                        '', '\n'))
+        result.append(('', '', '\n'))
+        result.append(('        ', '.text', '\n'))
+        result.append(('        ', '.align 2', '\n'))
+        reduce(pool1, sorted(syms, key=syms.get), result)
+    return result
+
+
+def global_symbols(instrs):
+    # The form of a global symbol differs between Linux assemblers and
+    # the Apple assember:
+    #
+    # Linux: xxx
+    # Apple: _xxx
+    #
+    # Change occurrences of global symbols to use the Glo() cpp macro
+    # defined in our prefix.
+    #
+    # We consider a symbol to be global if:
+    #
+    # a.  It appears in a .globl declaration; or
+    # b.  It is referenced, has global form, and is not defined
+    #
+    glosyms = set()
+    refsyms = set()
+    defsyms = set()
+    result = []
+
+    def findglo1 (glosyms, (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line; nothing to do
+            return glosyms
+        (b1, b2, b3) = parse_iparts(b)
+        mo = re.match('(\.globl)' + ccce(g_ccid), b3)
+        if mo:
+            tokens = parse_expr(b3[len(mo.group(1)):])
+            if forall(lambda t: token_type(t) in ['space', 'id', ','], tokens):
+                for t in tokens:
+                    if token_type(t) == 'id':
+                        glosyms.add(t)
+        return glosyms
+
+    def findref1 ((refsyms, skipct), (a, b, c)):
+
+        def looksglobal(s):
+            if re.match('(r|a|v|p|c|cr|f|s|d|q|mvax|wcgr)[0-9]+$', s, re.I):
+                return False # numbered registers
+            if re.match('(wr|sb|sl|fp|ip|sp|lr|pc)$', s, re.I):
+                return False # named registers
+            if re.match('(fpsid|fpscr|fpexc|mvfr1|mvfr0)$', s, re.I):
+                return False # more named registers
+            if re.match('(mvf|mvd|mvfx|mvdx|dspsc)$', s, re.I):
+                return False # even more named registers
+            if re.match('(wcid|wcon|wcssf|wcasf|acc)$', s, re.I):
+                return False # even more named registers
+            if re.match('\.$|\.L|[0-9]|#', s):
+                return False # dot, local symbol, or number
+            if re.match('(asl|lsl|lsr|asr|ror|rrx)$', s, re.I):
+                return False # shift names
+            return True
+
+        if re.match('#', b):
+            # Preprocessor line; nothing to do
+            return (refsyms, skipct)
+
+        # Track nesting of .macro/.endm.  For now, we don't look for
+        # global syms in macro defs.  (Avoiding scoping probs etc.)
+        #
+        if skipct > 0 and re.match('\.(endm|endmacro)' + ccce(g_ccid), b):
+            return (refsyms, skipct - 1)
+        if re.match('\.macro' + ccce(g_ccid), b):
+            return (refsyms, skipct + 1)
+        if skipct > 0:
+            return (refsyms, skipct)
+        if re.match('\.(type|size|syntax|arch|fpu)' + ccce(g_ccid), b):
+            return (refsyms, skipct)
+
+        (b1, b2, b3) = parse_iparts(b)
+        rtokens = parse_rexpr(b3)
+        if len(rtokens) > 1 and rtokens[1] == '.req':
+            # .req has atypical syntax; no symbol refs there anyway
+            return (refsyms, skipct)
+        for t in rtokens[1:]:
+            if token_type(t) == 'id' and looksglobal(t):
+                refsyms.add(t)
+        return (refsyms, skipct)
+
+    def finddef1(defsyms, (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line
+            return defsyms
+        (b1, b2, b3) = parse_iparts(b)
+        rtokens = parse_rexpr(b3)
+        if b1 != '':
+            defsyms.add(b1)
+        if len(rtokens) > 1 and rtokens[1] == '.req':
+            defsyms.add(rtokens[0])
+        return defsyms
+
+    def repl1((glosyms, result), (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line
+            result.append((a, b, c))
+            return (glosyms, result)
+        toglo = lambda s: 'Glo(' + s + ')'
+        (b1, b2, b3) = parse_iparts(b)
+        tokens = parse_expr(b3)
+
+        if b1 in glosyms:
+            b1 = toglo(b1)
+        for i in range(len(tokens)):
+            if token_type(tokens[i]) == 'id' and tokens[i] in glosyms:
+                tokens[i] = toglo(tokens[i])
+        result.append((a, b1 + b2 + ''.join(tokens), c))
+        return (glosyms, result)
+
+    reduce(findglo1, instrs, glosyms)
+    reduce(findref1, instrs, (refsyms, 0))
+    reduce(finddef1, instrs, defsyms)
+    glosyms |= (refsyms - defsyms)
+    reduce(repl1, instrs, (glosyms, result))
+    return result
+
+
+def local_symbols(instrs):
+    # The form of a local symbol differs between Linux assemblers and
+    # the Apple assember:
+    #
+    # Linux: .Lxxx
+    # Apple: Lxxx
+    #
+    # Change occurrences of local symbols to use the Loc() cpp macro
+    # defined in our prefix.
+    #
+    lsyms = set()
+    result = []
+
+    def find1 (lsyms, (a, b, c)):
+        mo = re.match('(\.L[^ \t:]*)[ \t]*:', b)
+        if mo:
+            lsyms.add(mo.group(1))
+        return lsyms
+
+    def repl1((lsyms, result), (a, b, c)):
+        matches = list(re.finditer('\.L[^ \t@:,+*/\-()]+', b))
+        if matches != []:
+            matches.reverse()
+            newb = b
+            for mo in matches:
+                if mo.group() in lsyms:
+                    newb = newb[0:mo.start()] + \
+                            'Loc(' + mo.group()[2:] + ')' + \
+                            newb[mo.end():]
+            result.append((a, newb, c))
+        else:
+            result.append((a, b, c))
+        return (lsyms, result)
+
+    reduce(find1, instrs, lsyms)
+    reduce(repl1, instrs, (lsyms, result))
+    return result
+
+
+def funtypes(instrs):
+    # Linux assemblers accept declarations like this:
+    #
+    #     .type  symbol, %function
+    #
+    # For Thumb functions, the Apple assembler wants to see:
+    #
+    #     .thumb_func symbol
+    #
+    # Handle this by converting declarations to this:
+    #
+    #     .funtype symbol
+    #
+    # Our prefix defines an appropriate .funtype macro for each
+    # environment.
+    #
+    result = []
+
+    def repl1(result, (a, b, c)):
+        mo = re.match('.type[ \t]+([^ \t,]*),[ \t]*%function', b)
+        if mo:
+            result.append((a, '.funtype  ' + mo.group(1), c))
+        else:
+            result.append((a, b, c))
+        return result
+
+    reduce(repl1, instrs, result)
+    return result
+
+
+def jump_tables(instrs):
+    # Jump tables for Linux assemblers often look like this:
+    #
+    #     tbh [pc, rM, lsl #1]
+    #     .short (.Labc-.)/2+0
+    #     .short (.Ldef-.)/2+1
+    #     .short (.Lghi-.)/2+2
+    #
+    # The Apple assembler disagrees about the meaning of this code,
+    # producing jump tables that don't work.  Convert to the following:
+    #
+    #     tbh [pc, rM, lsl #1]
+    # .LBxxx:
+    #     .short (.Labc-.LBxxx)/2
+    #     .short (.Ldef-.LBxxx)/2
+    #     .short (.Lghi-.LBxxx)/2
+    #
+    # In fact we just convert sequences of .short pseudo-ops of the
+    # right form.  There's no requirement that they follow a tbh
+    # instruction.
+    #
+    baselabs = []
+    result = []
+
+    def short_match(seq, op):
+        # Determine whether the op is a .short of the form that needs to
+        # be converted: .short (symbol-.)/2+k.  If so, return a pair
+        # containing the symbol and the value of k.  If not, return
+        # None.  The short can only be converted if there were at least
+        # k other .shorts in sequence before the current one.  A summary
+        # of the previous .shorts is in seq.
+        #
+        # (A real parser would do a better job, but this was quick to
+        # get working.)
+        #
+        sp = '([ \t]|/\*.*?\*/)*'              # space
+        sp1 = '([ \t]|/\*.*?\*/)+'             # at least 1 space
+        spe = '([ \t]|/\*.*?\*/|@[^\n]*)*$'    # end-of-instr space
+        expr_re0 = (
+            '\.short' + sp + '\(' + sp +       # .short (
+            '([^ \t+\-*/@()]+)' + sp +         # symbol
+            '-' + sp + '\.' + sp + '\)' + sp + # -.)
+            '/' + sp + '2' + spe               # /2 END
+        )
+        expr_re1 = (
+            '\.short' + sp + '\(' + sp +       # .short (
+            '([^ \t+\-*/@()]+)' + sp +         # symbol
+            '-' + sp + '\.' + sp + '\)' + sp + # -.)
+            '/' + sp + '2' + sp +              # /2
+            '\+' + sp +                        # +
+            '((0[xX])?[0-9]+)' + spe           # k END
+        )
+        expr_re2 = (
+            '\.short' + sp1 +                  # .short
+            '((0[xX])?[0-9]+)' + sp +          # k
+            '\+' + sp + '\(' + sp +            # +(
+            '([^ \t+\-*/@()]+)' + sp +         # symbol
+            '-' + sp + '\.' + sp + '\)' + sp + # -.)
+            '/' + sp + '2' + spe               # /2 END
+        )
+        mo = re.match(expr_re0, op)
+        if mo:
+            return(mo.group(3), 0)
+        mo = re.match(expr_re1, op)
+        if mo:
+            k = int(mo.group(11), 0)
+            if k > len(seq):
+                return None
+            return (mo.group(3), k)
+        mo = re.match(expr_re2, op)
+        if mo:
+            k = int(mo.group(2), 0)
+            if k > len(seq):
+                return None
+            return (mo.group(7), k)
+        return None
+
+    def conv1 ((baselabs, shortseq, label, result), (a, b, c)):
+        # Convert current instr (a,b,c) if it's a .short of the right
+        # form that spans a previous sequence of .shorts.
+        #
+        (b1, b2, b3) = parse_iparts(b)
+
+        if b3 == '':
+            # No operation: just note label if present.
+            result.append((a, b, c))
+            if re.match('\.L.', b1):
+                return (baselabs, shortseq, b1, result)
+            return (baselabs, shortseq, label, result)
+
+        if not re.match('.short[ \t]+[^ \t@]', b3):
+            # Not a .short: clear shortseq and label
+            result.append((a, b, c))
+            return (baselabs, [], '', result)
+
+        # We have a .short: figure out the label if any
+        if re.match('\.L', b1):
+            sl = b1
+        else:
+            sl = label
+
+        mpair = short_match(shortseq, b3)
+        if not mpair:
+            # A .short, but not of right form
+            shortseq.append((len(result), sl))
+            result.append((a, b, c))
+            return (baselabs, shortseq, '', result)
+
+        # OK, we have a .short to convert!
+        (sym, k) = mpair
+        shortseq.append((len(result), sl))
+
+        # Figure out base label (create one if necessary).
+        bx = len(shortseq) - 1 - k
+        bl = shortseq[bx][1]
+        if bl == '':
+            bl = g_basepfx + str(shortseq[bx][0])
+            shortseq[bx] = (shortseq[bx][0], bl)
+            baselabs.append(shortseq[bx])
+
+        op = '.short\t(' + sym + '-' + bl + ')/2'
+
+        result.append ((a, b1 + b2 + op, c))
+        return (baselabs, shortseq, '', result)
+
+    # Convert, accumulate result and new labels.
+    reduce(conv1, instrs, (baselabs, [], '', result))
+
+    # Add labels created here to the instruction stream.
+    baselabs.reverse()
+    for (ix, lab) in baselabs:
+        result[ix:0] = [('', lab + ':', '\n')]
+
+    # That does it
+    return result
+
+
+def dot_relative(instrs):
+    # The Apple assembler (or possibly the linker) has trouble with code
+    # that looks like this:
+    #
+    #     .word   .Label - . + 0x80000000
+    #     .word   0x1966
+    # .Label:
+    #     .word   0x1967
+    #
+    # One way to describe the problem is that the assembler marks the
+    # first .word for relocation when in fact it's an assembly-time
+    # constant.  Translate to the following form, which doesn't generate
+    # a relocation marking:
+    #
+    # DR0 =       .Label - . + 0x80000000
+    #     .word   DR0
+    #     .word   0x1966
+    # .Label:
+    #     .word   0x1967
+    #
+    prefix = 'DR'
+    pseudos = '(\.byte|\.short|\.word|\.long|\.quad)'
+    result = []
+
+    def tok_ok(t):
+        return t in ['.', '+', '-', '(', ')'] or \
+            token_type(t) in ['space', 'locid', 'number']
+
+    def dotrel_match(expr):
+        # Determine whether the expression is one that needs to be
+        # translated.
+        tokens = parse_expr(expr)
+        return forall(tok_ok, tokens) and \
+            exists(lambda t: token_type(t) == 'locid', tokens) and \
+            exists(lambda t: token_type(t) == 'number', tokens) and \
+            exists(lambda t: t == '-', tokens) and \
+            exists(lambda t: t == '.', tokens)
+
+    def conv1(result, (a, b, c)):
+        if re.match('#', b):
+            # Preprocessor line
+            result.append((a, b, c))
+        else:
+            (b1, b2, b3) = parse_iparts(b)
+            mo = re.match(pseudos + ccce(g_ccid), b3)
+            if mo:
+                p = mo.group(1)
+                expr = b3[len(p):]
+                if dotrel_match(expr):
+                    sym = prefix + str(len(result))
+                    instr = sym + ' =' + expr
+                    result.append(('', instr, '\n'))
+                    result.append((a, b1 + b2 + p + ' ' + sym, c))
+                else:
+                    result.append((a, b, c))
+            else:
+                result.append((a, b, c))
+        return result
+
+    reduce(conv1, instrs, result)
+    return result
+
+
+def read_input():
+    # Concatenate all the input files into a string.
+    #
+    def fnl(s):
+        if s == '' or s[-1] == '\n':
+            return s
+        else:
+            return s + '\n'
+
+    if len(sys.argv) < 2:
+        return fnl(sys.stdin.read())
+    else:
+        input = ""
+        for f in sys.argv[1:]:
+            try:
+                fd = open(f)
+                input = input + fnl(fd.read())
+                fd.close()
+            except:
+                sys.stderr.write('arm-as-to-ios: cannot open ' + f + '\n')
+        return input
+
+
+def parse_instrs(s):
+    # Parse the string into assembly instructions, also noting C
+    # preprocessor lines.  Each instruction is represented as a triple:
+    # (space/comments, instruction, end).  The end is either ';' or
+    # '\n'.
+    #
+    def goodmo(mo):
+        if mo == None:
+            # Should never happen
+            sys.stderr.write('arm-as-to-ios: internal parsing error\n')
+            sys.exit(1)
+
+    cpp_re = '([ \t]*)(#([^\n]*\\\\\n)*[^\n]*[^\\\\\n])\n'
+    comment_re = '[ \t]*#[^\n]*'
+    instr_re = (
+        '(([ \t]|/\*.*?\*/|@[^\n]*)*)'  # Spaces & comments
+        '(([ \t]|/\*.*?\*/|[^;\n])*)'   # "Instruction"
+        '([;\n])'                       # End
+    )
+    instrs = []
+    while s != '':
+        if re.match('[ \t]*#[ \t]*(if|ifdef|elif|else|endif|define)', s):
+            mo = re.match(cpp_re, s)
+            goodmo(mo)
+            instrs.append((mo.group(1), mo.group(2), '\n'))
+        elif re.match('[ \t]*#', s):
+            mo = re.match(comment_re, s)
+            goodmo(mo)
+            instrs.append((mo.group(0), '', '\n'))
+        else:
+            mo = re.match(instr_re, s, re.DOTALL)
+            goodmo(mo)
+            instrs.append((mo.group(1), mo.group(3), mo.group(5)))
+        s = s[len(mo.group(0)):]
+    return instrs
+
+
+def parse_iparts(i):
+    # Parse an instruction into smaller parts, returning a triple of
+    # strings (label, colon, operation).  The colon part also contains
+    # any surrounding spaces and comments (making the label and the
+    # operation cleaner to process).
+    #
+    # (Caller warrants that the given string doesn't start with space or
+    # a comment.  This is true for strings returned by the instruction
+    # parser.)
+    #
+    lab_re = (
+        '([^ \t:/@]+)'                  # Label
+        '(([ \t]|/\*.*?\*/|@[^\n]*)*)'  # Spaces & comments
+        ':'                             # Colon
+        '(([ \t]|/\*.*?\*/|@[^\n]*)*)'  # Spaces & comments
+        '([^\n]*)'                      # Operation
+    )
+
+    if len(i) > 0 and i[0] == '#':
+        # C preprocessor line; treat as operation.
+        return ('', '', i)
+    mo = re.match(lab_re, i)
+    if mo:
+        return (mo.group(1), mo.group(2) + ':' + mo.group(4), mo.group(6))
+    # No label, just an operation
+    return ('', '', i)
+
+
+def parse_expr(s):
+    # Parse a string into a sequence of tokens.  A segment of white
+    # space (including comments) is treated as a token, so that the
+    # tokens can be reassembled into the string again.
+    #
+    result = []
+    while s != '':
+        mo = re.match('([ \t]|/\*.*?\*/|@.*)+', s)
+        if not mo:
+            # Glo(...) and Loc(...) are single tokens
+            mo = re.match('(Glo|Loc)\([^()]*\)', s)
+        if not mo:
+            mo = re.match('"([^\\\\"]|\\\\.)*"', s)
+        if not mo:
+            mo = re.match(g_ccid0 + g_ccid + '*', s)
+        if not mo:
+            mo = re.match('[0-9]+[bf]', s)
+        if not mo:
+            mo = re.match('0[Xx][0-9a-fA-F]+|[0-9]+', s)
+        if not mo:
+            mo = re.match('.', s)
+        result.append(mo.group(0))
+        s = s[len(mo.group(0)):]
+    return result
+
+
+def parse_rexpr(s):
+    # Like parse_expr(), but return only "real" tokens, not the
+    # intervening space.
+    #
+    return filter(lambda t: token_type(t) != 'space', parse_expr(s))
+
+
+def token_type(t):
+    # Determine the type of a token.  Caller warrants that it was
+    # returned by parse_expr() or parse_rexpr().
+    #
+    if re.match('[ \t]|/\*|@', t):
+        return 'space'
+    if re.match('Glo\(', t):
+        return 'gloid'
+    if re.match('Loc\(', t):
+        return 'locid'
+    if re.match('"', t):
+        return 'string'
+    if re.match(g_ccid0, t):
+        return 'id'
+    if re.match('[0-9]+[bf]', t):
+        return 'label'
+    if re.match('[0-9]', t):
+        return 'number'
+    return t # Sui generis
+
+
+def debug_parse(a, b, c):
+    # Show results of instuction stream parse.
+    #
+    (b1, b2, b3) = parse_iparts(b)
+    newb = '{' + b1 + '}' + '{' + b2 + '}' + '{' + b3 + '}'
+    sys.stdout.write('{' + a + '}' + newb + c)
+
+
+def main():
+    instrs = parse_instrs(read_input())
+    instrs = explicit_address_loads(instrs)
+    instrs = funtypes(instrs)
+    instrs = jump_tables(instrs)
+    instrs = global_symbols(instrs)
+    instrs = local_symbols(instrs)
+    instrs = dot_relative(instrs)
+    instrs = add_prefix(instrs)
+    for (a, b, c) in instrs:
+       sys.stdout.write(a + b + c)
+
+
+main()
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+set -e
+if [ -z "$O3" ]; then
+    echo O3 var must point to ovpn3 tree
+    exit 1
+fi
+if [ -z "$TARGET" ]; then
+    echo TARGET var must be defined
+    exit 1
+fi
+
+[ -z "$DL" ] && DL=~/Downloads
+
+. $O3/core/vars/vars-$TARGET
+. $O3/core/deps/lib-versions
+
+# Build ASM files with clang 3.0
+if [ "$APPLE_FAMILY" = "1" ]; then
+    GCC_AS_CMD=$HOME/clang3/clang
+    if ! [ -f "$GCC_AS_CMD" ]; then
+	echo "clang 3.0 binary must be present in $GCC_AS_CMD to assemble ARM crypto algorithms"
+	exit 1
+    fi
+fi
+
+DEST=minicrypto/minicrypto-$PLATFORM
+
+GLOBAL_COMPILE_FLAGS="$PLATFORM_FLAGS $OTHER_COMPILER_FLAGS $LIB_OPT_LEVEL $LIB_FPIC -DSHA1_ASM -DBF_PTR -DOPENSSL_VERSION_PTEXT= -D__LP32__"
+
+[ -z "$GCC_CMD" ] && GCC_CMD=gcc
+[ -z "$GCC_AS_CMD" ] && GCC_AS_CMD="$GCC_CMD"
+[ -z "$AR_CMD" ] && AR_CMD=ar
+# the directory where this script lives
+H=$O3/core/deps/minicrypto
+
+if [ "$NO_WIPE" != "1" ]; then
+    # unzip OpenSSL
+    rm -rf $OPENSSL_VERSION
+    tar xfz $DL/$OPENSSL_VERSION.tar.gz
+fi
+
+OPENSSL_DIR=$(pwd)/$OPENSSL_VERSION
+
+# make build directory
+mkdir -p minicrypto
+rm -rf minicrypto/minicrypto-$PLATFORM
+mkdir -p minicrypto/minicrypto-$PLATFORM/build.tmp
+cd minicrypto/minicrypto-$PLATFORM/build.tmp
+mkdir openssl
+
+# copy files from OpenSSL tree
+
+# ARM
+cp $OPENSSL_DIR/crypto/arm_arch.h .
+
+# SHA general
+cp $OPENSSL_DIR/crypto/md32_common.h .
+cp $OPENSSL_DIR/crypto/sha/sha.h openssl
+
+# AES
+cp $OPENSSL_DIR/crypto/aes/asm/aes-armv4.pl .
+
+# SHA1
+cp $OPENSSL_DIR/crypto/sha/asm/sha1-armv4-large.pl .
+cp $OPENSSL_DIR/crypto/sha/sha_locl.h .
+cp $OPENSSL_DIR/crypto/sha/sha1dgst.c .
+
+# SHA2
+cp $OPENSSL_DIR/crypto/sha/sha256.c .
+cp $OPENSSL_DIR/crypto/sha/asm/sha256-armv4.pl .
+
+# SHA4
+cp $OPENSSL_DIR/crypto/sha/sha512.c .
+cp $OPENSSL_DIR/crypto/sha/asm/sha512-armv4.pl .
+
+# note that OPENSSL_cleanse is not used by any
+# of the functions we are interested in
+cat >openssl/crypto.h <<EOF
+#define fips_md_init(alg) fips_md_init_ctx(alg, alg)
+#define fips_md_init_ctx(alg, cx) int alg##_Init(cx##_CTX *c)
+#define OPENSSL_cleanse(ptr, len) memset((ptr), 0, (len))
+EOF
+
+# irrelevant headers
+touch openssl/e_os2.h
+touch openssl/opensslconf.h
+touch openssl/opensslv.h
+touch aes_locl.h
+touch cryptlib.h
+touch crypto.h
+
+# patches
+patch <$H/aes-armv4.pl.patch
+patch <$H/sha512-armv4.pl.patch
+perl -pi -e 's/private_//g' aes-armv4.pl
+for f in aes-armv4.pl sha256-armv4.pl sha512-armv4.pl ; do # armv4cpuid.pre
+    perl -pi -e 's/^(\.code.*)$/\/* \1 *\//' $f
+done
+
+# build C files
+for f in *.c ; do
+    COMPILE_FLAGS="-Wno-unused-value"
+    CMD="$GCC_CMD $GLOBAL_COMPILE_FLAGS $COMPILE_FLAGS -I. -c $f"
+    echo $CMD
+    $CMD
+done
+
+# build armv4cpuid.S
+#$O3/core/deps/minicrypto/arm-as-to-ios <armv4cpuid.pre >armv4cpuid.S
+#CMD="$GCC_AS_CMD $GLOBAL_COMPILE_FLAGS -DSYS_macosx -DNO_THUMB -c armv4cpuid.S"
+#echo $CMD
+#$CMD
+
+# build the ASM files given as perl source
+for f in *.pl ; do
+    bn=${f%%.pl}
+    S=$bn.S
+    COMPILE_FLAGS=""
+    CVT_FLAGS=""
+    if [ "$APPLE_FAMILY" = "1" ]; then
+	COMPILE_FLAGS="$COMPILE_FLAGS -DNO_THUMB"
+	[ "$bn" = "aes-armv4" ] && CVT_FLAGS="$CVT_FLAGS --global=!ad1,!ad2,!ad3"
+	[ "$bn" = "sha512-armv4" ] && CVT_FLAGS="$CVT_FLAGS --global=!HI,!LO"
+	perl $f | $O3/core/deps/minicrypto/arm-as-to-ios --stdin $CVT_FLAGS >$S
+    else
+	perl $f >$S	
+    fi
+    CMD="$GCC_AS_CMD $GLOBAL_COMPILE_FLAGS $COMPILE_FLAGS -DSYS_macosx -c $S"
+    echo $CMD
+    $CMD
+done
+
+CMD="$AR_CMD crs ../libminicrypto.a *.o"
+echo $CMD
+$CMD
+exit 0
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+
+set -e
+if [ -z "$O3" ]; then
+    echo O3 var must point to ovpn3 tree
+    exit 1
+fi
+if [ -z "$TARGET" ]; then
+    echo TARGET var must be defined
+    exit 1
+fi
+if [ -z "$ARCH" ]; then
+    echo "ARCH var must be defined (x86_64|i386)"
+    exit 1
+fi
+
+[ -z "$DL" ] && DL=~/Downloads
+
+. $O3/core/vars-$TARGET
+. $O3/core/deps/lib-versions
+
+DEST=minicrypto/minicrypto-$PLATFORM
+
+GLOBAL_COMPILE_FLAGS="$MIN_DEPLOY_TARGET $OTHER_COMPILER_FLAGS $LIB_OPT_LEVEL $LIB_FPIC"
+
+[ -z "$GCC_CMD" ] && GCC_CMD=gcc
+[ -z "$GCC_AS_CMD" ] && GCC_AS_CMD="$GCC_CMD"
+[ -z "$AR_CMD" ] && AR_CMD=ar
+
+# the directory where this script lives
+H=$O3/core/deps/minicrypto
+
+if [ "$NO_WIPE" != "1" ]; then
+    # unzip OpenSSL
+    rm -rf $OPENSSL_VERSION
+    tar xfz $DL/$OPENSSL_VERSION.tar.gz
+fi
+
+OPENSSL_DIR=$(pwd)/$OPENSSL_VERSION
+
+# make build directory
+mkdir -p minicrypto
+rm -rf minicrypto/minicrypto-$PLATFORM/$ARCH
+mkdir -p minicrypto/minicrypto-$PLATFORM/$ARCH/build.tmp
+cd minicrypto/minicrypto-$PLATFORM/$ARCH/build.tmp
+mkdir openssl
+
+# copy files from OpenSSL tree
+
+# AES (not necessary now that PolarSSL has AES optimizations)
+#cp $OPENSSL_DIR/crypto/aes/asm/aesni-x86_64.pl .
+
+if [ "$ARCH" = "x86_64" ]; then
+    # General
+    cp $O3/core/deps/polarssl/intel_cpu.c .
+    cp $OPENSSL_DIR/crypto/perlasm/x86_64-xlate.pl .
+    cp $OPENSSL_DIR/crypto/x86_64cpuid.pl .
+
+    # SHA general
+    cp $OPENSSL_DIR/crypto/md32_common.h .
+    cp $OPENSSL_DIR/crypto/sha/sha.h openssl
+
+    # SHA1
+    cp $OPENSSL_DIR/crypto/sha/sha_locl.h .
+    cp $OPENSSL_DIR/crypto/sha/sha1dgst.c .
+    cp $OPENSSL_DIR/crypto/sha/asm/sha1-x86_64.pl .
+
+    # SHA256
+    cp $OPENSSL_DIR/crypto/sha/sha256.c .
+
+    # SHA512
+    cp $OPENSSL_DIR/crypto/sha/sha512.c .
+    cp $OPENSSL_DIR/crypto/sha/asm/sha512-x86_64.pl .
+
+    # convert perl ASM to .s
+    for f in x86_64cpuid sha1-x86_64 ; do
+	perl $f.pl macosx >$f.s
+    done
+    perl sha512-x86_64.pl macosx sha512-x86_64.s
+    perl sha512-x86_64.pl macosx sha256-x86_64.s
+elif [ "$ARCH" = "i386" ]; then
+    # General
+    cp $O3/core/deps/polarssl/intel_cpu.c .
+    cp $OPENSSL_DIR/crypto/perlasm/x86asm.pl .
+    cp $OPENSSL_DIR/crypto/perlasm/x86gas.pl .
+    cp $OPENSSL_DIR/crypto/x86cpuid.pl .
+
+    # SHA general
+    cp $OPENSSL_DIR/crypto/md32_common.h .
+    cp $OPENSSL_DIR/crypto/sha/sha.h openssl
+
+    # SHA1
+    cp $OPENSSL_DIR/crypto/sha/sha_locl.h .
+    cp $OPENSSL_DIR/crypto/sha/sha1dgst.c .
+    cp $OPENSSL_DIR/crypto/sha/asm/sha1-586.pl .
+
+    # SHA256
+    cp $OPENSSL_DIR/crypto/sha/sha256.c .
+    cp $OPENSSL_DIR/crypto/sha/asm/sha256-586.pl .
+
+    # SHA512
+    cp $OPENSSL_DIR/crypto/sha/sha512.c .
+    cp $OPENSSL_DIR/crypto/sha/asm/sha512-586.pl .
+
+    # convert perl ASM to .s
+    for f in x86cpuid sha1-586 sha256-586 sha512-586 ; do
+	perl $f.pl macosx >$f.s
+    done
+fi
+
+cat >openssl/crypto.h <<EOF
+#define fips_md_init(alg) fips_md_init_ctx(alg, alg)
+#define fips_md_init_ctx(alg, cx) int alg##_Init(cx##_CTX *c)
+void OPENSSL_cleanse(void *ptr, unsigned long len);
+#define OPENSSL_VERSION_PTEXT " minicrypto"
+EOF
+
+# irrelevant headers
+touch openssl/e_os2.h
+touch openssl/opensslconf.h
+touch openssl/opensslv.h
+touch aes_locl.h
+touch cryptlib.h
+touch crypto.h
+
+# build C/ASM files
+for f in *.c *.s ; do
+    COMPILE_FLAGS="-arch $ARCH -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM"
+    CMD="$GCC_CMD $GLOBAL_COMPILE_FLAGS $COMPILE_FLAGS -I. -c $f"
+    echo $CMD
+    $CMD
+done
+
+CMD="$AR_CMD crs ../libminicrypto.a *.o"
+echo $CMD
+$CMD
+echo SYMBOLS
+nm ../libminicrypto.a
+exit 0
@@ -0,0 +1,32 @@
+--- sha512-armv4.pl.orig	2012-09-03 13:21:35.000000000 -0600
+++ sha512-armv4.pl	2012-09-03 13:50:08.000000000 -0600
+@@ -220,9 +220,6 @@
+ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+ .size	K512,.-K512
+-.LOPENSSL_armcap:
+-.word	OPENSSL_armcap_P-sha512_block_data_order
+-.skip	32-4
+ 
+ .global	sha512_block_data_order
+ .type	sha512_block_data_order,%function
+@@ -230,10 +227,7 @@
+ 	sub	r3,pc,#8		@ sha512_block_data_order
+ 	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
+ #if __ARM_ARCH__>=7
+-	ldr	r12,.LOPENSSL_armcap
+-	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+-	tst	r12,#1
+-	bne	.LNEON
+        b	.LNEON                  @ JY -- assume ARM v7 always supports NEON
+ #endif
+ 	stmdb	sp!,{r4-r12,lr}
+ 	sub	$Ktbl,r3,#672		@ K512
+@@ -573,7 +567,6 @@
+ .size	sha512_block_data_order,.-sha512_block_data_order
+ .asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+ .align	2
+-.comm	OPENSSL_armcap_P,4,4
+ ___
+ 
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;