mirror of
https://github.com/OpenVPN/openvpn3.git
synced 2024-09-20 12:12:15 +02:00
9373f515ee
build scripts to vars and deps.
760 lines
24 KiB
Python
Executable File
760 lines
24 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# arm-as-to-ios Modify ARM assembly code for the iOS assembler
|
|
#
|
|
# Copyright (c) 2012 Psellos http://psellos.com/
|
|
# Licensed under the MIT License:
|
|
# http://www.opensource.org/licenses/mit-license.php
|
|
#
|
|
# Resources for running OCaml on iOS: http://psellos.com/ocaml/
|
|
#
|
|
import sys
|
|
import re
|
|
|
|
VERSION = '1.4.0'
|
|
|
|
initial_glosyms = []
|
|
initial_defsyms = []
|
|
|
|
# Character classes for expression lexing.
|
|
#
|
|
g_ccid0 = '[$.A-Z_a-z\x80-\xff]' # Beginning of id
|
|
g_ccid = '[$.0-9A-Z_a-z\x80-\xff]' # Later in id
|
|
def ccc(cc): # Complement the class
|
|
if cc[1] == '^':
|
|
return cc[0] + cc[2:]
|
|
return cc[0] + '^' + cc[1:]
|
|
def ccce(cc): # Complement the class, include EOL
|
|
return '(?:' + ccc(cc) + '|$)'
|
|
|
|
# Prefixes for pooled symbol labels and jump table base labels. They're
|
|
# in the space of Linux assembler local symbols. Later rules will
|
|
# modify them to the Loc() form.
|
|
#
|
|
g_poolpfx = '.LP'
|
|
g_basepfx = '.LB'
|
|
|
|
|
|
def exists(p, l):
|
|
for l1 in l:
|
|
if p(l1):
|
|
return True
|
|
return False
|
|
|
|
|
|
def forall(p, l):
|
|
for l1 in l:
|
|
if not p(l1):
|
|
return False
|
|
return True
|
|
|
|
|
|
def add_prefix(instrs):
|
|
# Add compatibility macros for all systems, plus hardware
|
|
# definitions and compatibility macros for iOS.
|
|
#
|
|
# All systems:
|
|
#
|
|
# Glo() cpp macro for making global symbols (xxx vs _xxx)
|
|
# Loc() cpp macro for making local symbols (.Lxxx vs Lxxx)
|
|
# .funtype Expands to .thumb_func for iOS armv7 (null for armv6)
|
|
# Expands to .type %function for others
|
|
#
|
|
# iOS:
|
|
#
|
|
# .machine armv6/armv7
|
|
# .thumb (for armv7)
|
|
# cbz Expands to cmp/beq for armv6 (Thumb-only instr)
|
|
# .type Not supported by Apple assembler
|
|
# .size Not supported by Apple assembler
|
|
#
|
|
defre = '#[ \t]*if.*def.*SYS' # Add new defs near first existing ones
|
|
skipre = '$|\.syntax[ \t]' # Skip comment lines (and .syntax)
|
|
|
|
for i in range(len(instrs)):
|
|
if re.match(defre, instrs[i][1]):
|
|
break
|
|
else:
|
|
i = 0
|
|
for i in range(i, len(instrs)):
|
|
if not re.match(skipre, instrs[i][1]):
|
|
break
|
|
instrs[i:0] = [
|
|
('', '', '\n'),
|
|
('/* Apple compatibility macros */', '', '\n'),
|
|
('', '#if defined(SYS_macosx)', '\n'),
|
|
('', '#define Glo(s) _##s', '\n'),
|
|
('', '#define Loc(s) L##s', '\n'),
|
|
('', '#if defined(MODEL_armv6)', '\n'),
|
|
(' ', '.machine armv6', '\n'),
|
|
(' ', '.macro .funtype', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
(' ', '.macro cbz', '\n'),
|
|
(' ', 'cmp $0, #0', '\n'),
|
|
(' ', 'beq $1', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
('', '#else', '\n'),
|
|
(' ', '.machine armv7', '\n'),
|
|
('', '#if !defined(NO_THUMB)', '\n'),
|
|
(' ', '.thumb', '\n'),
|
|
('', '#endif', '\n'),
|
|
(' ', '.macro .funtype', '\n'),
|
|
('', '#if !defined(NO_THUMB)', '\n'),
|
|
(' ', '.thumb_func $0', '\n'),
|
|
('', '#endif', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
('', '#endif', '\n'),
|
|
(' ', '.macro .type', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
(' ', '.macro .size', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
(' ', '.macro .skip', '\n'),
|
|
(' ', '.space $0', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
(' ', '.macro .fpu', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
(' ', '.macro .global', '\n'),
|
|
(' ', '.globl $0', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
('', '#else', '\n'),
|
|
('', '#define Glo(s) s', '\n'),
|
|
('', '#define Loc(s) .L##s', '\n'),
|
|
(' ', '.macro .funtype symbol', '\n'),
|
|
(' ', '.type \\symbol, %function', '\n'),
|
|
(' ', '.endm', '\n'),
|
|
('', '#endif', '\n'),
|
|
('/* End Apple compatibility macros */', '', '\n'),
|
|
('', '', '\n')
|
|
]
|
|
return instrs
|
|
|
|
|
|
# Regular expression for modified ldr lines
|
|
#
|
|
g_ldre = '(ldr[ \t][^,]*,[ \t]*)=(([^ \t\n@,/]|/(?!\*))*)(.*)'
|
|
|
|
|
|
def explicit_address_loads(instrs):
|
|
# Linux assemblers allow the following:
|
|
#
|
|
# ldr rM, =symbol
|
|
#
|
|
# which loads rM with [mov] (immediately) if possible, or creates an
|
|
# entry in memory for the symbol value and loads it PC-relatively
|
|
# with [ldr].
|
|
#
|
|
# The Apple assembler doesn't seem to support this notation. If the
|
|
# value is a suitable constant, it emits a valid [mov]. Otherwise
|
|
# it seems to emit an invalid [ldr] that always generates an error.
|
|
# (At least I have not been able to make it work). So, change uses
|
|
# of =symbol to explicit PC-relative loads.
|
|
#
|
|
# This requires a pool containing the addresses to be loaded. For
|
|
# now, we just keep track of it ourselves and emit it into the text
|
|
# segment at the end of the file.
|
|
#
|
|
syms = {}
|
|
result = []
|
|
|
|
def repl1((syms, result), (a, b, c)):
|
|
global g_poolpfx
|
|
global g_ldre
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
mo = re.match(g_ldre, b3, re.DOTALL)
|
|
if mo:
|
|
if mo.group(2) not in syms:
|
|
syms[mo.group(2)] = len(syms)
|
|
psym = mo.group(2)
|
|
if psym[0:2] == '.L':
|
|
psym = psym[2:]
|
|
newb3 = mo.group(1) + g_poolpfx + psym + mo.group(4)
|
|
result.append((a, b1 + b2 + newb3, c))
|
|
else:
|
|
result.append((a, b, c))
|
|
return (syms, result)
|
|
|
|
def pool1(result, s):
|
|
global g_poolpfx
|
|
psym = s
|
|
if psym[0:2] == '.L':
|
|
psym = psym[2:]
|
|
result.append(('', g_poolpfx + psym + ':', '\n'))
|
|
result.append((' ', '.long ' + s, '\n'))
|
|
return result
|
|
|
|
reduce(repl1, instrs, (syms, result))
|
|
if len(syms) > 0:
|
|
result.append(('', '', '\n'))
|
|
result.append(('/* Pool of addresses loaded into registers */',
|
|
'', '\n'))
|
|
result.append(('', '', '\n'))
|
|
result.append((' ', '.text', '\n'))
|
|
result.append((' ', '.align 2', '\n'))
|
|
reduce(pool1, sorted(syms, key=syms.get), result)
|
|
return result
|
|
|
|
|
|
def global_symbols(instrs):
|
|
# The form of a global symbol differs between Linux assemblers and
|
|
# the Apple assember:
|
|
#
|
|
# Linux: xxx
|
|
# Apple: _xxx
|
|
#
|
|
# Change occurrences of global symbols to use the Glo() cpp macro
|
|
# defined in our prefix.
|
|
#
|
|
# We consider a symbol to be global if:
|
|
#
|
|
# a. It appears in a .globl declaration; or
|
|
# b. It is referenced, has global form, and is not defined
|
|
#
|
|
glosyms = set(initial_glosyms)
|
|
refsyms = set()
|
|
defsyms = set(initial_defsyms)
|
|
result = []
|
|
|
|
def findglo1 (glosyms, (a, b, c)):
|
|
if re.match('#', b):
|
|
# Preprocessor line; nothing to do
|
|
return glosyms
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
mo = re.match('(\.globa?l)' + ccce(g_ccid), b3)
|
|
if mo:
|
|
tokens = parse_expr(b3[len(mo.group(1)):])
|
|
if forall(lambda t: token_type(t) in ['space', 'id', ','], tokens):
|
|
for t in tokens:
|
|
if token_type(t) == 'id':
|
|
glosyms.add(t)
|
|
return glosyms
|
|
|
|
def findref1 ((refsyms, skipct), (a, b, c)):
|
|
|
|
def looksglobal(s):
|
|
if re.match('(r|a|v|p|c|cr|f|s|d|q|mvax|wcgr)[0-9]+$', s, re.I):
|
|
return False # numbered registers
|
|
if re.match('(wr|sb|sl|fp|ip|sp|lr|pc)$', s, re.I):
|
|
return False # named registers
|
|
if re.match('(fpsid|fpscr|fpexc|mvfr1|mvfr0)$', s, re.I):
|
|
return False # more named registers
|
|
if re.match('(mvf|mvd|mvfx|mvdx|dspsc)$', s, re.I):
|
|
return False # even more named registers
|
|
if re.match('(wcid|wcon|wcssf|wcasf|acc)$', s, re.I):
|
|
return False # even more named registers
|
|
if re.match('\.$|\.L|[0-9]|#', s):
|
|
return False # dot, local symbol, or number
|
|
if re.match('(asl|lsl|lsr|asr|ror|rrx)$', s, re.I):
|
|
return False # shift names
|
|
return True
|
|
|
|
if re.match('#', b):
|
|
# Preprocessor line; nothing to do
|
|
return (refsyms, skipct)
|
|
|
|
# Track nesting of .macro/.endm. For now, we don't look for
|
|
# global syms in macro defs. (Avoiding scoping probs etc.)
|
|
#
|
|
if skipct > 0 and re.match('\.(endm|endmacro)' + ccce(g_ccid), b):
|
|
return (refsyms, skipct - 1)
|
|
if re.match('\.macro' + ccce(g_ccid), b):
|
|
return (refsyms, skipct + 1)
|
|
if skipct > 0:
|
|
return (refsyms, skipct)
|
|
if re.match('\.(type|size|syntax|arch|fpu)' + ccce(g_ccid), b):
|
|
return (refsyms, skipct)
|
|
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
rtokens = parse_rexpr(b3)
|
|
if len(rtokens) > 1 and rtokens[1] == '.req':
|
|
# .req has atypical syntax; no symbol refs there anyway
|
|
return (refsyms, skipct)
|
|
for t in rtokens[1:]:
|
|
if token_type(t) == 'id' and looksglobal(t):
|
|
refsyms.add(t)
|
|
return (refsyms, skipct)
|
|
|
|
def finddef1(defsyms, (a, b, c)):
|
|
if re.match('#', b):
|
|
# Preprocessor line
|
|
return defsyms
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
rtokens = parse_rexpr(b3)
|
|
if b1 != '':
|
|
defsyms.add(b1)
|
|
if len(rtokens) > 1 and rtokens[1] == '.req':
|
|
defsyms.add(rtokens[0])
|
|
return defsyms
|
|
|
|
def repl1((glosyms, result), (a, b, c)):
|
|
if re.match('#', b):
|
|
# Preprocessor line
|
|
result.append((a, b, c))
|
|
return (glosyms, result)
|
|
toglo = lambda s: 'Glo(' + s + ')'
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
tokens = parse_expr(b3)
|
|
|
|
if b1 in glosyms:
|
|
b1 = toglo(b1)
|
|
for i in range(len(tokens)):
|
|
if token_type(tokens[i]) == 'id' and tokens[i] in glosyms:
|
|
tokens[i] = toglo(tokens[i])
|
|
result.append((a, b1 + b2 + ''.join(tokens), c))
|
|
return (glosyms, result)
|
|
|
|
reduce(findglo1, instrs, glosyms)
|
|
reduce(findref1, instrs, (refsyms, 0))
|
|
reduce(finddef1, instrs, defsyms)
|
|
glosyms |= (refsyms - defsyms)
|
|
reduce(repl1, instrs, (glosyms, result))
|
|
return result
|
|
|
|
|
|
def local_symbols(instrs):
|
|
# The form of a local symbol differs between Linux assemblers and
|
|
# the Apple assember:
|
|
#
|
|
# Linux: .Lxxx
|
|
# Apple: Lxxx
|
|
#
|
|
# Change occurrences of local symbols to use the Loc() cpp macro
|
|
# defined in our prefix.
|
|
#
|
|
lsyms = set()
|
|
result = []
|
|
|
|
def find1 (lsyms, (a, b, c)):
|
|
mo = re.match('(\.L[^ \t:]*)[ \t]*:', b)
|
|
if mo:
|
|
lsyms.add(mo.group(1))
|
|
return lsyms
|
|
|
|
def repl1((lsyms, result), (a, b, c)):
|
|
matches = list(re.finditer('\.L[^ \t@:,+*/\-()]+', b))
|
|
if matches != []:
|
|
matches.reverse()
|
|
newb = b
|
|
for mo in matches:
|
|
if mo.group() in lsyms:
|
|
newb = newb[0:mo.start()] + \
|
|
'Loc(' + mo.group()[2:] + ')' + \
|
|
newb[mo.end():]
|
|
result.append((a, newb, c))
|
|
else:
|
|
result.append((a, b, c))
|
|
return (lsyms, result)
|
|
|
|
reduce(find1, instrs, lsyms)
|
|
reduce(repl1, instrs, (lsyms, result))
|
|
return result
|
|
|
|
|
|
def funtypes(instrs):
|
|
# Linux assemblers accept declarations like this:
|
|
#
|
|
# .type symbol, %function
|
|
#
|
|
# For Thumb functions, the Apple assembler wants to see:
|
|
#
|
|
# .thumb_func symbol
|
|
#
|
|
# Handle this by converting declarations to this:
|
|
#
|
|
# .funtype symbol
|
|
#
|
|
# Our prefix defines an appropriate .funtype macro for each
|
|
# environment.
|
|
#
|
|
result = []
|
|
|
|
def repl1(result, (a, b, c)):
|
|
mo = re.match('.type[ \t]+([^ \t,]*),[ \t]*%function', b)
|
|
if mo:
|
|
result.append((a, '.funtype ' + mo.group(1), c))
|
|
else:
|
|
result.append((a, b, c))
|
|
return result
|
|
|
|
reduce(repl1, instrs, result)
|
|
return result
|
|
|
|
|
|
def jump_tables(instrs):
|
|
# Jump tables for Linux assemblers often look like this:
|
|
#
|
|
# tbh [pc, rM, lsl #1]
|
|
# .short (.Labc-.)/2+0
|
|
# .short (.Ldef-.)/2+1
|
|
# .short (.Lghi-.)/2+2
|
|
#
|
|
# The Apple assembler disagrees about the meaning of this code,
|
|
# producing jump tables that don't work. Convert to the following:
|
|
#
|
|
# tbh [pc, rM, lsl #1]
|
|
# .LBxxx:
|
|
# .short (.Labc-.LBxxx)/2
|
|
# .short (.Ldef-.LBxxx)/2
|
|
# .short (.Lghi-.LBxxx)/2
|
|
#
|
|
# In fact we just convert sequences of .short pseudo-ops of the
|
|
# right form. There's no requirement that they follow a tbh
|
|
# instruction.
|
|
#
|
|
baselabs = []
|
|
result = []
|
|
|
|
def short_match(seq, op):
|
|
# Determine whether the op is a .short of the form that needs to
|
|
# be converted: .short (symbol-.)/2+k. If so, return a pair
|
|
# containing the symbol and the value of k. If not, return
|
|
# None. The short can only be converted if there were at least
|
|
# k other .shorts in sequence before the current one. A summary
|
|
# of the previous .shorts is in seq.
|
|
#
|
|
# (A real parser would do a better job, but this was quick to
|
|
# get working.)
|
|
#
|
|
sp = '([ \t]|/\*.*?\*/)*' # space
|
|
sp1 = '([ \t]|/\*.*?\*/)+' # at least 1 space
|
|
spe = '([ \t]|/\*.*?\*/|@[^\n]*)*$' # end-of-instr space
|
|
expr_re0 = (
|
|
'\.short' + sp + '\(' + sp + # .short (
|
|
'([^ \t+\-*/@()]+)' + sp + # symbol
|
|
'-' + sp + '\.' + sp + '\)' + sp + # -.)
|
|
'/' + sp + '2' + spe # /2 END
|
|
)
|
|
expr_re1 = (
|
|
'\.short' + sp + '\(' + sp + # .short (
|
|
'([^ \t+\-*/@()]+)' + sp + # symbol
|
|
'-' + sp + '\.' + sp + '\)' + sp + # -.)
|
|
'/' + sp + '2' + sp + # /2
|
|
'\+' + sp + # +
|
|
'((0[xX])?[0-9]+)' + spe # k END
|
|
)
|
|
expr_re2 = (
|
|
'\.short' + sp1 + # .short
|
|
'((0[xX])?[0-9]+)' + sp + # k
|
|
'\+' + sp + '\(' + sp + # +(
|
|
'([^ \t+\-*/@()]+)' + sp + # symbol
|
|
'-' + sp + '\.' + sp + '\)' + sp + # -.)
|
|
'/' + sp + '2' + spe # /2 END
|
|
)
|
|
mo = re.match(expr_re0, op)
|
|
if mo:
|
|
return(mo.group(3), 0)
|
|
mo = re.match(expr_re1, op)
|
|
if mo:
|
|
k = int(mo.group(11), 0)
|
|
if k > len(seq):
|
|
return None
|
|
return (mo.group(3), k)
|
|
mo = re.match(expr_re2, op)
|
|
if mo:
|
|
k = int(mo.group(2), 0)
|
|
if k > len(seq):
|
|
return None
|
|
return (mo.group(7), k)
|
|
return None
|
|
|
|
def conv1 ((baselabs, shortseq, label, result), (a, b, c)):
|
|
# Convert current instr (a,b,c) if it's a .short of the right
|
|
# form that spans a previous sequence of .shorts.
|
|
#
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
|
|
if b3 == '':
|
|
# No operation: just note label if present.
|
|
result.append((a, b, c))
|
|
if re.match('\.L.', b1):
|
|
return (baselabs, shortseq, b1, result)
|
|
return (baselabs, shortseq, label, result)
|
|
|
|
if not re.match('.short[ \t]+[^ \t@]', b3):
|
|
# Not a .short: clear shortseq and label
|
|
result.append((a, b, c))
|
|
return (baselabs, [], '', result)
|
|
|
|
# We have a .short: figure out the label if any
|
|
if re.match('\.L', b1):
|
|
sl = b1
|
|
else:
|
|
sl = label
|
|
|
|
mpair = short_match(shortseq, b3)
|
|
if not mpair:
|
|
# A .short, but not of right form
|
|
shortseq.append((len(result), sl))
|
|
result.append((a, b, c))
|
|
return (baselabs, shortseq, '', result)
|
|
|
|
# OK, we have a .short to convert!
|
|
(sym, k) = mpair
|
|
shortseq.append((len(result), sl))
|
|
|
|
# Figure out base label (create one if necessary).
|
|
bx = len(shortseq) - 1 - k
|
|
bl = shortseq[bx][1]
|
|
if bl == '':
|
|
bl = g_basepfx + str(shortseq[bx][0])
|
|
shortseq[bx] = (shortseq[bx][0], bl)
|
|
baselabs.append(shortseq[bx])
|
|
|
|
op = '.short\t(' + sym + '-' + bl + ')/2'
|
|
|
|
result.append ((a, b1 + b2 + op, c))
|
|
return (baselabs, shortseq, '', result)
|
|
|
|
# Convert, accumulate result and new labels.
|
|
reduce(conv1, instrs, (baselabs, [], '', result))
|
|
|
|
# Add labels created here to the instruction stream.
|
|
baselabs.reverse()
|
|
for (ix, lab) in baselabs:
|
|
result[ix:0] = [('', lab + ':', '\n')]
|
|
|
|
# That does it
|
|
return result
|
|
|
|
|
|
def dot_relative(instrs):
|
|
# The Apple assembler (or possibly the linker) has trouble with code
|
|
# that looks like this:
|
|
#
|
|
# .word .Label - . + 0x80000000
|
|
# .word 0x1966
|
|
# .Label:
|
|
# .word 0x1967
|
|
#
|
|
# One way to describe the problem is that the assembler marks the
|
|
# first .word for relocation when in fact it's an assembly-time
|
|
# constant. Translate to the following form, which doesn't generate
|
|
# a relocation marking:
|
|
#
|
|
# DR0 = .Label - . + 0x80000000
|
|
# .word DR0
|
|
# .word 0x1966
|
|
# .Label:
|
|
# .word 0x1967
|
|
#
|
|
prefix = 'DR'
|
|
pseudos = '(\.byte|\.short|\.word|\.long|\.quad)'
|
|
result = []
|
|
|
|
def tok_ok(t):
|
|
return t in ['.', '+', '-', '(', ')'] or \
|
|
token_type(t) in ['space', 'locid', 'number']
|
|
|
|
def dotrel_match(expr):
|
|
# Determine whether the expression is one that needs to be
|
|
# translated.
|
|
tokens = parse_expr(expr)
|
|
return forall(tok_ok, tokens) and \
|
|
exists(lambda t: token_type(t) == 'locid', tokens) and \
|
|
exists(lambda t: token_type(t) == 'number', tokens) and \
|
|
exists(lambda t: t == '-', tokens) and \
|
|
exists(lambda t: t == '.', tokens)
|
|
|
|
def conv1(result, (a, b, c)):
|
|
if re.match('#', b):
|
|
# Preprocessor line
|
|
result.append((a, b, c))
|
|
else:
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
mo = re.match(pseudos + ccce(g_ccid), b3)
|
|
if mo:
|
|
p = mo.group(1)
|
|
expr = b3[len(p):]
|
|
if dotrel_match(expr):
|
|
sym = prefix + str(len(result))
|
|
instr = sym + ' =' + expr
|
|
result.append(('', instr, '\n'))
|
|
result.append((a, b1 + b2 + p + ' ' + sym, c))
|
|
else:
|
|
result.append((a, b, c))
|
|
else:
|
|
result.append((a, b, c))
|
|
return result
|
|
|
|
reduce(conv1, instrs, result)
|
|
return result
|
|
|
|
|
|
def read_input():
|
|
# Concatenate all the input files into a string.
|
|
#
|
|
def fnl(s):
|
|
if s == '' or s[-1] == '\n':
|
|
return s
|
|
else:
|
|
return s + '\n'
|
|
|
|
if len(sys.argv) < 2:
|
|
return fnl(sys.stdin.read())
|
|
else:
|
|
input = ""
|
|
for f in sys.argv[1:]:
|
|
# allow global symbols to be enabled or disabled, eg:
|
|
# --global=foo,!bar
|
|
# foo is forced to be global
|
|
# bar is forced to be non-global
|
|
if f.startswith('--global='):
|
|
glist = f[9:].split(',')
|
|
for g in glist:
|
|
if g.startswith('!'):
|
|
initial_defsyms.append(g[1:])
|
|
else:
|
|
initial_glosyms.append(g)
|
|
elif f.startswith('--stdin'):
|
|
input = input + fnl(sys.stdin.read())
|
|
else:
|
|
try:
|
|
fd = open(f)
|
|
input = input + fnl(fd.read())
|
|
fd.close()
|
|
except:
|
|
sys.stderr.write('arm-as-to-ios: cannot open ' + f + '\n')
|
|
return input
|
|
|
|
|
|
def parse_instrs(s):
|
|
# Parse the string into assembly instructions, also noting C
|
|
# preprocessor lines. Each instruction is represented as a triple:
|
|
# (space/comments, instruction, end). The end is either ';' or
|
|
# '\n'.
|
|
#
|
|
def goodmo(mo):
|
|
if mo == None:
|
|
# Should never happen
|
|
sys.stderr.write('arm-as-to-ios: internal parsing error\n')
|
|
sys.exit(1)
|
|
|
|
cpp_re = '([ \t]*)(#([^\n]*\\\\\n)*[^\n]*[^\\\\\n])\n'
|
|
comment_re = '[ \t]*#[^\n]*'
|
|
instr_re = (
|
|
'(([ \t]|/\*.*?\*/|@[^\n]*)*)' # Spaces & comments
|
|
'(([ \t]|/\*.*?\*/|[^;\n])*)' # "Instruction"
|
|
'([;\n])' # End
|
|
)
|
|
instrs = []
|
|
while s != '':
|
|
if re.match('[ \t]*#[ \t]*(if|ifdef|elif|else|endif|define)', s):
|
|
mo = re.match(cpp_re, s)
|
|
goodmo(mo)
|
|
instrs.append((mo.group(1), mo.group(2), '\n'))
|
|
elif re.match('[ \t]*#', s):
|
|
mo = re.match(comment_re, s)
|
|
goodmo(mo)
|
|
instrs.append((mo.group(0), '', '\n'))
|
|
else:
|
|
mo = re.match(instr_re, s, re.DOTALL)
|
|
goodmo(mo)
|
|
instrs.append((mo.group(1), mo.group(3), mo.group(5)))
|
|
s = s[len(mo.group(0)):]
|
|
return instrs
|
|
|
|
|
|
def parse_iparts(i):
|
|
# Parse an instruction into smaller parts, returning a triple of
|
|
# strings (label, colon, operation). The colon part also contains
|
|
# any surrounding spaces and comments (making the label and the
|
|
# operation cleaner to process).
|
|
#
|
|
# (Caller warrants that the given string doesn't start with space or
|
|
# a comment. This is true for strings returned by the instruction
|
|
# parser.)
|
|
#
|
|
lab_re = (
|
|
'([^ \t:/@]+)' # Label
|
|
'(([ \t]|/\*.*?\*/|@[^\n]*)*)' # Spaces & comments
|
|
':' # Colon
|
|
'(([ \t]|/\*.*?\*/|@[^\n]*)*)' # Spaces & comments
|
|
'([^\n]*)' # Operation
|
|
)
|
|
|
|
if len(i) > 0 and i[0] == '#':
|
|
# C preprocessor line; treat as operation.
|
|
return ('', '', i)
|
|
mo = re.match(lab_re, i)
|
|
if mo:
|
|
return (mo.group(1), mo.group(2) + ':' + mo.group(4), mo.group(6))
|
|
# No label, just an operation
|
|
return ('', '', i)
|
|
|
|
|
|
def parse_expr(s):
|
|
# Parse a string into a sequence of tokens. A segment of white
|
|
# space (including comments) is treated as a token, so that the
|
|
# tokens can be reassembled into the string again.
|
|
#
|
|
result = []
|
|
while s != '':
|
|
mo = re.match('([ \t]|/\*.*?\*/|@.*)+', s)
|
|
if not mo:
|
|
# Glo(...) and Loc(...) are single tokens
|
|
mo = re.match('(Glo|Loc)\([^()]*\)', s)
|
|
if not mo:
|
|
mo = re.match('"([^\\\\"]|\\\\.)*"', s)
|
|
if not mo:
|
|
mo = re.match(g_ccid0 + g_ccid + '*', s)
|
|
if not mo:
|
|
mo = re.match('[0-9]+[bf]', s)
|
|
if not mo:
|
|
mo = re.match('0[Xx][0-9a-fA-F]+|[0-9]+', s)
|
|
if not mo:
|
|
mo = re.match('.', s)
|
|
result.append(mo.group(0))
|
|
s = s[len(mo.group(0)):]
|
|
return result
|
|
|
|
|
|
def parse_rexpr(s):
|
|
# Like parse_expr(), but return only "real" tokens, not the
|
|
# intervening space.
|
|
#
|
|
return filter(lambda t: token_type(t) != 'space', parse_expr(s))
|
|
|
|
|
|
def token_type(t):
|
|
# Determine the type of a token. Caller warrants that it was
|
|
# returned by parse_expr() or parse_rexpr().
|
|
#
|
|
if re.match('[ \t]|/\*|@', t):
|
|
return 'space'
|
|
if re.match('Glo\(', t):
|
|
return 'gloid'
|
|
if re.match('Loc\(', t):
|
|
return 'locid'
|
|
if re.match('"', t):
|
|
return 'string'
|
|
if re.match(g_ccid0, t):
|
|
return 'id'
|
|
if re.match('[0-9]+[bf]', t):
|
|
return 'label'
|
|
if re.match('[0-9]', t):
|
|
return 'number'
|
|
return t # Sui generis
|
|
|
|
|
|
def debug_parse(a, b, c):
|
|
# Show results of instuction stream parse.
|
|
#
|
|
(b1, b2, b3) = parse_iparts(b)
|
|
newb = '{' + b1 + '}' + '{' + b2 + '}' + '{' + b3 + '}'
|
|
sys.stdout.write('{' + a + '}' + newb + c)
|
|
|
|
|
|
def main():
|
|
instrs = parse_instrs(read_input())
|
|
instrs = explicit_address_loads(instrs)
|
|
instrs = funtypes(instrs)
|
|
instrs = jump_tables(instrs)
|
|
instrs = global_symbols(instrs)
|
|
instrs = local_symbols(instrs)
|
|
instrs = dot_relative(instrs)
|
|
instrs = add_prefix(instrs)
|
|
for (a, b, c) in instrs:
|
|
sys.stdout.write(a + b + c)
|
|
|
|
|
|
main()
|