AwkDc

This is an awk implementation of the dc arbitrary precision RPN calculator (can't let the sed weenies have all the fun, now can we?).

It is very much incomplete at the moment. Most notably missing is the arbitrary precision arithmetic, which is being developed in a separate library.

The Roadmap

The approximate roadmap involves:

  1. full dc opcode compatibility with optional support for the GNU extensions. awk native internal numbers will be used in this first version, with the LargestAccurateNumber being an implementation-dependent value (though most likely 2^52);
  2. integration of the arbitrary-precision arithmetic library;
  3. a bc-to-dc compiler written in pure awk;
  4. integration of fast-path cases where native awk numbers can be used instead of bignums for speed gains

There is no time line to go with this roadmap.

Notably, this version of dc is intended to include tail-call elimination for macro calls. Tail recursion drops out of this optimization as a special case because a macro cannot actually recurse: it can only call another macro by name, which may or may not be itself. There is no special way for a dc macro to recurse without naming itself, such as the Forth word 'RECURSE', the Scheme form described in SRFI-31 (entitled "A special form rec for recursive evaluation"), tacit programming in J, or point-free style in Haskell and other languages.

dc cannot be considered to be Turing-complete as currently implemented because it cannot loop infinitely: every version I know of will overflow the stack and die.

The Code

Here is version 0.1 of AwkDc:

The dc backbone

#!/usr/bin/awk -f 

# DEBUG FUNCTIONS {{{ -----------------

function showop(op) {
	#print "op: " op >> "/dev/stderr"
	return
}

function putstring(stk, str) {
	_dc_put(stk, "[" str "]")
}

# }}} ---------------------------------

# PRIMORDIAL FUNCTIONS {{{ ------------

function _dc_put(stk, num) {
	showop("put")
	stk[++stk[SP]] = num
}

function _dc_pop(stk) {
	showop("pop")
	return stk[stk[SP]--]
}

# }}} ---------------------------------

# TYPE VERIFICATION FUNCTIONS {{{ -----

function _dc_isnum(val) {
	return (val ~ /^-?[0-9]+(\.[0-9]+)?$/)
}

function _dc_isint(val) {
	return (val ~ /^-?[0-9]+$/)
}

function _dc_isstr(val) {
	return ((val ~ /^\[/) && (val ~ /\]$/))
}

function _dc_isarr(val) {
	return (val ~ /^ARRAY: /)
}

# }}} ---------------------------------

# PRIMITIVE OPERATOR FUNCTIONS {{{ ----

# -- printing commands

function _dc_print(stk) {
	showop("print")
	print stk[stk[SP]]
}

function _dc_prin(stk) {
	showop("prin")
	printf stk[stk[SP]]
}

function _dc_xprint(stk,	tos1, i) {
	showop("xprint")
	tos1 = _dc_pop(stk)
	if (_dc_isstr(tos1)) {
		printf tos1
	} else if (_dc_isnum(tos1)) {
		while (tos1 > 0) {
			printf "%c", (tos1 - (255 * int(tos1 / 255)))
			# done this way instead of with bitwise ops because
			# busybox awk doesn't support bitwise operations.
			# FIXME: this will break horribly when the number
			#  on the TOS is not an integer - catch this case
			#  and throw an error message. Also see how GNU dc
			#  handles the case.
			# FIXME: add a type-verification function to check
			#  for integer values? This facility is used in at
			#  least two places now.
			# FIXME: this entire process can be bypassed with
			#  the use of a lookup table and a byte-aligned
			#  bignum representation.
			# FIXME: check the ordering to make sure that I'm
			#  not processing things from the wrong end.
			tos1 /= 255
		}
	}
}

function _dc_show(stk,	i, buf) {
	showop("show")
	for (i = stk[SP]; i >= 0; i--) buf = (buf "\n" stk[i])
	sub(/^\n+/, "", buf)
	print buf
}

# -- arithmetic

function _dc_add(stk,	tos1, tos2) {
	showop("add")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		_dc_put(stk, tos2 + tos1)
	} else {
		# FIXME: error message here
		# FIXME: extend '+' to operate on strings?
		# FIXME: the above comment is stupid, and
		#  the person who wrote it should be slapped.
	}
}

function _dc_sub(stk,	tos1, tos2) {
	showop("sub")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		_dc_put(stk, tos2 - tos1)
	} else {
		# FIXME: error message here
	}
}

function _dc_mul(stk,	tos1, tos2) {
	showop("mul")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		_dc_put(stk, tos2 * tos1)
	} else {
		# FIXME: error message here
		# FIXME: take into account variable overflow
	}
}

function _dc_div(stk,	tos1, tos2) {
	showop("div")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		_dc_put(stk, tos2 / tos1)
	} else {
		# FIXME: error message here
		# FIXME: take into account variable precision
	}
}

function _dc_mod(stk,	tos1, tos2) {
	showop("mod")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		_dc_put(stk, tos2 % tos1)
		# FIXME: these semantics don't seem to be
		#  quite right yet. Some more extensive
		#  test cases are definitely required.
		#  How to handle negative numbers?
	} else {
		# FIXME: error message here
	}
}

function _dc_dmod(stk,	tos1, tos2, quotient, residue) { # GNU extension!
	showop("dmod")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		quotient = (tos2 / tos1)
		residue  = quotient - int(quotient)
		_dc_put(stk, quotient)
		_dc_put(stk, residue)
	} else {
		# FIXME: error message here
	}
}

function _dc_pow(stk,	tos1, tos2) {
	showop("pow")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	if (_dc_isnum(tos1) && _dc_isnum(tos2)) {
		_dc_put(stk, tos2 ^ tos1)
	} else {
		# FIXME: error message here
	}
}

function _dc_modx(stk,	modulus, exponent, base) { # GNU extension!
	showop("modx")
	modulus  = _dc_pop(stk)
	exponent = _dc_pop(stk)
	base     = _dc_pop(stk)
	# This is an incredibly stupid implementation which ignores
	# the problem of intermediate overflow. Totally, totally dumb.
	# It will be replaced with a version of the right-to-left
	# modexp algorithm soon, and it will disappear entirely when
	# the bignum code is merged in.
	if (_dc_isnum(base) && _dc_isnum(exponent) && _dc_isnum(modulus)) {
		# FIXME: add checks to ensure that exponent and modulus are
		#  integer values! (is that worth doing now?)
		_dc_put(stk, (base ^ exponent) % modulus)
	} else {
		# FIXME: add error checking here!
	}
}

function _dc_sqrt(stk,	tos1) {
	showop("sqrt")
	tos1 = _dc_pop(stk)
	if (_dc_isnum(tos1)) {
		_dc_put(stk, sqrt(tos1))
	} else {
		# FIXME: error message here
	}
}

# -- stack control

function _dc_clear(stk) {
	stk[SP] = -1
	stk[stk[SP]] = 0
	# FIXME: make error cases call this function,
	#  call this on startup,
	#  check whether the initial stack is 0 or empty
}

function _dc_dup(stk,	tos1) {
	showop("dup")
	tos1 = _dc_pop(stk)
	_dc_put(stk, tos1)
	_dc_put(stk, tos1)
}

function _dc_swap(stk,	tos1, tos2) { # GNU extension!
	showop("swap")
	tos1 = _dc_pop(stk)
	tos2 = _dc_pop(stk)
	_dc_put(stk, tos1)
	_dc_put(stk, tos2)
}

# -- registers

# s

# l

# S

# L

# -- parameters

# i

# o

# k

# I

# O

# K

# -- strings

# a

# x

# >r

# !>r

# <r

# !<r

# =r

# !=r

# ?

function _dc_quit(stk) {
	exit 0
	# FIXME: add macro-exiting logic here
}

# Q

# -- status inquiry

# Z

# X

# z

# -- miscellaneous

# !

# #

# :r

# ;r

# }}} ---------------------------------

# OPERATOR DISPATCH TABLE {{{ ---------

function _dc_dispatch(stk, op,	num, sig) {
	  # printing commands - complete
	       if (op == "p") { _dc_print(stk)
	} else if (op == "n") { _dc_prin(stk)
	} else if (op == "P") { _dc_xprint(stk)
	} else if (op == "f") { _dc_show(stk)
	  # arithmetic - complete
	} else if (op == "+") { _dc_add(stk)
	} else if (op == "-") { _dc_sub(stk)
	} else if (op == "*") { _dc_mul(stk)
	} else if (op == "/") { _dc_div(stk)
	} else if (op == "%") { _dc_mod(stk)
	} else if (op == "~") { _dc_dmod(stk)
	} else if (op == "^") { _dc_pow(stk)
	} else if (op == "|") { _dc_modx(stk)
	} else if (op == "v") { _dc_sqrt(stk)
	  # stack control - complete
	} else if (op == "c") { _dc_clear(stk)
	} else if (op == "d") { _dc_dup(stk)
	} else if (op == "r") { _dc_swap(stk)
	  # registers  ----- 4 ops missing
	  # parameters  ---- 6 ops missing
	  # strings  ------- 10 ops missing
	} else if (op == "q") { _dc_quit(stk)
	  # status inquiry - 3 ops missing
	  # miscellaneous  - 4 ops missing
	} else if (op ~ /^_?[0-9]+$/) {
		if (op ~ /^_/) {
			sig = -1
			num = (substr(op, 2) + 0)
		} else {
			sig = 1
			num = (op + 0)
		}
		_dc_put(stk, sig * num)
	} else {
		printf "Unhandled op '%s'!\n", op
	}
}

# }}} ---------------------------------


# stupid trivial test parser
# FIXME: replace this stupidity with a proper lexer
# FIXME: it won't be possible to handle any of the
#  string ops until this is replaced with a proper
#  parser, except perhaps by bypassing the operator
#  dispatch table and stuffing string values
#  directly onto the stack via putstring().
{for (i = 1; i <= NF; i++) _dc_dispatch(S, $i)}

...and here is a set of not-yet-properly-automated test cases:

The test suite

BEGIN {
	SP = "SP"
	#S[SP] = -1

	test1()
	test2()
	test3()
	test4()
	test5()
	test6()
	exit 0
}

####################################
# only stupid unit test code below # {{{
####################################

function test1(	S) { # test direct stack access; ops: put, show, add, mul, sub, div
	S[SP] = -1
	_dc_put(S, 10)
	_dc_put(S, 20)
	_dc_show(S)
	print "should be '20\\n10\n"

	_dc_add(S)
	_dc_show(S)
	print "should be 30\n"

	_dc_put(S, 3)
	_dc_mul(S)
	_dc_show(S)
	print "should be 90\n"

	_dc_put(S, 10)
	_dc_sub(S)
	_dc_show(S)
	print "should be 80\n"

	_dc_put(S, 4)
	_dc_div(S)
	_dc_show(S)
	print "should be 20\n"
}

function test2(	S) { # test op dispatch; ops: show/f, add/+, mul/*, sub/-, div//
	S[SP] = -1
	_dc_dispatch(S, "10")
	_dc_dispatch(S, "20")
	_dc_dispatch(S, "f")
	print "should be 10\\n20\n"

	_dc_dispatch(S, "+")
	_dc_dispatch(S, "f")
	print "should be 30\n"

	_dc_dispatch(S, "3")
	_dc_dispatch(S, "*")
	_dc_dispatch(S, "f")
	print "should be 90\n"

	_dc_dispatch(S, "10")
	_dc_dispatch(S, "-")
	_dc_dispatch(S, "f")
	print "should be 80\n"

	_dc_dispatch(S, "4")
	_dc_dispatch(S, "/")
	_dc_dispatch(S, "f")
	print "should be 20\n"
}

function test3(	S) { # test op dispatch, literal and computed negative numbers; ops: show/f, mul/*
	S[SP] = -1
	_dc_dispatch(S, "10")
	_dc_dispatch(S, "20")
	_dc_dispatch(S, "_1")
	_dc_dispatch(S, "f")
	print "should be -1\\n20\\n10\n"

	_dc_dispatch(S, "*")
	_dc_dispatch(S, "f")
	print "should be -20\\n10\n"

	_dc_dispatch(S, "*")
	_dc_dispatch(S, "f")
	print "should be -200\n"

	_dc_dispatch(S, "_1")
	_dc_dispatch(S, "f")
	print "should be -1\\n-200\n"

	_dc_dispatch(S, "*")
	_dc_dispatch(S, "f")
	print "should be 200\n"
}

function test4(	S) { # test op dispatch, stack clearing; ops: show/f, clear/c
	S[SP] = -1
	_dc_dispatch(S, "10")
	_dc_dispatch(S, "20")
	_dc_dispatch(S, "f")
	print "should be 20\\n10\n"

	_dc_dispatch(S, "c")
	print "should be nothing (clear stack)"

	_dc_dispatch(S, "10")
	_dc_dispatch(S, "20")
	_dc_dispatch(S, "f")
	print "should be 20\\n10\n"

	_dc_dispatch(S, "+")
	_dc_dispatch(S, "f")
	print "should be 30\n"
}

function test5(	S) { # test op dispatch, arithmetic and stack control; ops: sqrt/v, show/f, exp/^, dup/d, div//
	S[SP] = -1
	_dc_dispatch(S, "100")
	_dc_dispatch(S, "v")
	_dc_dispatch(S, "f")
	print "should be 10\n"

	_dc_dispatch(S, "3")
	_dc_dispatch(S, "^")
	_dc_dispatch(S, "f")
	print "should be 1000\n"

	_dc_dispatch(S, "d")
	_dc_dispatch(S, "f")
	print "should be 1000\\n1000\n"

	_dc_dispatch(S, "/")
	_dc_dispatch(S, "f")
	print "should be 1\n"
}

function test6(	S) { # test op dispatch, arithmetic and stack control; ops: show/f, dmod/~, modx/|, swap/r
	S[SP] = -1
	# FIXME: test dmod, modx, swap
	_dc_dispatch(S, "100")
	_dc_dispatch(S, "10")
	_dc_dispatch(S, "f")
	print "should be 10\\n100\n"

	_dc_dispatch(S, "~")
	_dc_dispatch(S, "f")
	print "should be 0\\n10\n"

	_dc_dispatch(S, "r")
	_dc_dispatch(S, "f")
	print "should be 10\\n0\n"

	_dc_dispatch(S, "+")
	_dc_dispatch(S, "f")
	print "should be 10\n"

	_dc_dispatch(S, "3")
	_dc_dispatch(S, "r")
	_dc_dispatch(S, "5")
	_dc_dispatch(S, "f")
	print "should be 5\\n10\\n3\n"

	_dc_dispatch(S, "|")
	_dc_dispatch(S, "f")
	print "should be 4\n"
}

# }}}
####################################

The bignum library

Yeah, um. This isn't done yet.