diff --git a/ometa.lua b/ometa.lua index b609705..c0fdc86 100644 --- a/ometa.lua +++ b/ometa.lua @@ -20,60 +20,59 @@ local Record = dataType { local depth = 0 local cache = {} -local OMeta +local OMeta = {} -OMeta = { - - use = function(grammar) +OMeta.use = function(grammar) local om = cache[grammar] if not om then om = OMeta.Input {grammar = grammar} cache[grammar] = om end return om - end; - - _do = function(oast, trans) +end + +OMeta._do = function(oast, trans) local last = require('ometa_ast2lua_ast_' .. (trans or 'reference')).toLuaAst(oast) local lsrc = require 'lua_ast2source'.trans:matchMixed(last) + --print("\n\n", lsrc, "\n\n") local gmod = loadstring(lsrc) return gmod() - end; - - doFile = function(path, trans) +end + +OMeta.doFile = function(path, trans) local oast = require 'ometa_lua_grammar'.OMetaInLuaGrammar.block:matchFile(path) return OMeta._do(oast, trans) - end; - - doString = function(str, trans) +end + +OMeta.doString = function(str, trans) local oast = require 'ometa_lua_grammar'.OMetaInLuaGrammar.block:matchString(str) return OMeta._do(oast, trans) - end; -} +end + OMeta.Input = class { - name = 'Input', + name = 'Input', super = {Any}; memoizeParameters = false; - + apply = function(self, ruleImpl) - + local entryState = self.stream local ruleType = type(ruleImpl) - + if ruleType == 'function' then local pass, result = ruleImpl(self) if not pass then self.stream = entryState end return pass, result end - + if ruleType == 'table' then local behavior = ruleImpl.behavior if not behavior then -- "plain" type as a rule - if ruleImpl:isInstance(self.stream._head) then + if ruleImpl:isInstance(self.stream._head) then return self:next() else return false, self.stream._head @@ -82,21 +81,21 @@ OMeta.Input = class { return self:_memoize(ruleImpl, entryState, entryState, entryState) end end - + -- primitive Lua value (string|number|boolean) return self:applyWithArgs(self.grammar.exactly, ruleImpl) end, - + applyWithArgs = function(self, ruleImpl, ...) local entryState = self.stream - + if type(ruleImpl) == 'function' then local pass, result = ruleImpl(self, ...) if not pass then self.stream = entryState end return pass, result end - + local behavior = ruleImpl.behavior if not behavior then -- expected behavior not yet specified @@ -109,18 +108,18 @@ OMeta.Input = class { if fnarity < argsn then self.stream = self.stream:prepend(argsn - fnarity, select(fnarity + 1, ...)) end - + if fnarity ~= 0 and not self.memoizeParameters then local pass, result = behavior(self, ...) if not pass then self.stream = entryState end return pass, result end - return self:_memoize(ruleImpl, entryState, - self.stream, + return self:_memoize(ruleImpl, entryState, + self.stream, fnarity == 0 and self.stream or self.stream:prepend(fnarity, ...), ...) end, - + _memoize = function(self, ruleImpl, entryState, undoState, memoState, ...) ruleImpl.count = ruleImpl.count + 1 local record = memoState.memo[ruleImpl] @@ -131,9 +130,9 @@ OMeta.Input = class { } memoState.memo[ruleImpl] = record local pass, result = ruleImpl.behavior(self, ...) - if not pass then + if not pass then self.stream = entryState - return false, result + return false, result end record.pass = pass record.result = result @@ -144,9 +143,9 @@ OMeta.Input = class { while true do self.stream = undoState local pass, result = ruleImpl.behavior(self, ...) - if not pass - or self.stream == sentinelState - then break + if not pass + or self.stream == sentinelState + then break end record.pass = pass record.result = result @@ -182,7 +181,7 @@ OMeta.Input = class { self.stream = input.stream return pass, result end, - + next = function(self) local result = self.stream:head() if result == nil then return false end @@ -200,11 +199,11 @@ OMeta.Input = class { self.stream = tl return true, result end; - + property = function(self, index) self.stream = self.stream:property(index) end; - + match = function(self, ruleImpl, ...) local pass, result if not ... then @@ -224,17 +223,17 @@ OMeta.Input = class { end return result end; - + forString = function(self, str) self.stream = Streams.StringInputStream:new(str) return self end, - + forTable = function(self, tab) self.stream = Streams.TableInputStream:new(tab) return self end, - + forMixed = function(self, ...) local res, num, len = Array {}, 0, select('#', ...) for si = 1, len do @@ -267,7 +266,7 @@ OMeta.Input = class { self.stream = Streams.TableInputStream:new(res) return self end, - + forFile = function(self, path, binary) self.stream = (binary and Streams.BinaryInputStream or Streams.StringInputStream):new(utils.readFile(path)) return self @@ -275,10 +274,10 @@ OMeta.Input = class { } OMeta.Grammar = class { - + name = 'Grammar', - super = {Any}; - + super = {Any}; + constructor = function(class, init) for k, v in pairs(init) do if OMeta.Rule:isInstance(v) then @@ -287,7 +286,7 @@ OMeta.Grammar = class { end return init end; - + merge = function(self, source) for k, mrule in pairs(source) do if OMeta.Rule:isInstance(mrule) then @@ -314,10 +313,10 @@ OMeta.Grammar = class { } OMeta.Rule = class { - + name = 'Rule', super = {Any}; - + constructor = function(class, init) if init.arity == nil then init.arity = -1 end if init.count == nil then init.count = 0 end @@ -325,19 +324,19 @@ OMeta.Rule = class { if init.hits == nil then init.hits = 0 end return init end; - + matchString = function(self, str, ...) return OMeta.use(self.grammar):forString(str):match(self, ...) end; - + matchTable = function(self, tab, ...) return OMeta.use(self.grammar):forTable(tab):match(self, ...) end; - + matchMixed = function(self, ...) return OMeta.use(self.grammar):forMixed(...):match(self) end; - + matchFile = function(self, path, binary, ...) return OMeta.use(self.grammar):forFile(path, binary):match(self, ...) end; diff --git a/ometa/auxiliary.lpp b/ometa/auxiliary.lpp index 5ed1f3f..908192b 100644 --- a/ometa/auxiliary.lpp +++ b/ometa/auxiliary.lpp @@ -1,14 +1,25 @@ local tostring, tonumber, select, type, getmetatable, setmetatable, rawget = tostring, tonumber, select, type, getmetatable, setmetatable, rawget -local bit = require 'bit' -local band, bor, lshift = bit.band, bit.bor, bit.lshift +local band, bor, lshift +local lua_dot_version = _VERSION:match("5%.(%d)") +if lua_dot_version == "2" then + local bit32 = require("bit32") + band, bor, lshift = bit32.band, bit32.bor, bit32.lshift +elseif lua_dot_version > "2" then + loadstring = load + local bit = require("bit-compact") + band, bor, lshift = bit.band, bit.bor, bit.lshift +else + bit = require('bit') + band, bor, lshift = bit.band, bit.bor, bit.lshift +end local Types = require 'types' local Any, Array = Types.Any, Types.Array local Auxiliary = { - + pattern = function(input, p, drop) local r = input.stream:pattern(p) if not r then return false end @@ -25,8 +36,8 @@ local Auxiliary = { ruleRef = type(fallback) == 'string' and input.grammar[fallback] or fallback end if ... then - return input:applyWithArgs(ruleRef, ...) - else + return input:applyWithArgs(ruleRef, ...) + else return input:apply(ruleRef) end end; diff --git a/ometa/auxiliary.lua b/ometa/auxiliary.lua index 919c0d8..b52bf2c 100644 --- a/ometa/auxiliary.lua +++ b/ometa/auxiliary.lua @@ -1,6 +1,18 @@ local tostring, tonumber, select, type, getmetatable, setmetatable, rawget = tostring, tonumber, select, type, getmetatable, setmetatable, rawget -local bit = require('bit') -local band, bor, lshift = bit.band, bit.bor, bit.lshift +local band, bor, lshift +local lua_dot_version = _VERSION:match("5%.(%d)") +--print("_VERSION", _VERSION, lua_dot_version) +if lua_dot_version == "2" then + local bit32 = require("bit32") + band, bor, lshift = bit32.band, bit32.bor, bit32.lshift +elseif lua_dot_version > "2" then + loadstring = load + local bit = require("bit-compact") + band, bor, lshift = bit.band, bit.bor, bit.lshift +else + bit = require('bit') + band, bor, lshift = bit.band, bit.bor, bit.lshift +end local Types = require('types') local Any, Array = Types.Any, Types.Array local Auxiliary = {pattern = function (input, p, drop) diff --git a/ometa/bit-compact.lua b/ometa/bit-compact.lua new file mode 100644 index 0000000..b8b4940 --- /dev/null +++ b/ometa/bit-compact.lua @@ -0,0 +1,99 @@ + +local bit = {} + +function bit.bnot (a) + return ~a & 0xFFFFFFFF +end + + +-- +-- in all vararg functions, avoid creating 'arg' table when there are +-- only 2 (or less) parameters, as 2 parameters is the common case +-- + +function bit.band (x, y, z, ...) + if not z then + return ((x or -1) & (y or -1)) & 0xFFFFFFFF + else + local arg = {...} + local res = x & y & z + for i = 1, #arg do res = res & arg[i] end + return res & 0xFFFFFFFF + end +end + +function bit.bor (x, y, z, ...) + if not z then + return ((x or 0) | (y or 0)) & 0xFFFFFFFF + else + local arg = {...} + local res = x | y | z + for i = 1, #arg do res = res | arg[i] end + return res & 0xFFFFFFFF + end +end + +function bit.bxor (x, y, z, ...) + if not z then + return ((x or 0) ~ (y or 0)) & 0xFFFFFFFF + else + local arg = {...} + local res = x ~ y ~ z + for i = 1, #arg do res = res ~ arg[i] end + return res & 0xFFFFFFFF + end +end + +function bit.btest (...) + return bit.band(...) ~= 0 +end + +function bit.lshift (a, b) + return ((a & 0xFFFFFFFF) << b) & 0xFFFFFFFF +end + +function bit.rshift (a, b) + return ((a & 0xFFFFFFFF) >> b) & 0xFFFFFFFF +end + +function bit.arshift (a, b) + a = a & 0xFFFFFFFF + if b <= 0 or (a & 0x80000000) == 0 then + return (a >> b) & 0xFFFFFFFF + else + return ((a >> b) | ~(0xFFFFFFFF >> b)) & 0xFFFFFFFF + end +end + +function bit.lrotate (a ,b) + b = b & 31 + a = a & 0xFFFFFFFF + a = (a << b) | (a >> (32 - b)) + return a & 0xFFFFFFFF +end + +function bit.rrotate (a, b) + return bit.lrotate(a, -b) +end + +local function checkfield (f, w) + w = w or 1 + assert(f >= 0, "field cannot be negative") + assert(w > 0, "width must be positive") + assert(f + w <= 32, "trying to access non-existent bits") + return f, ~(-1 << w) +end + +function bit.extract (a, f, w) + local f, mask = checkfield(f, w) + return (a >> f) & mask +end + +function bit.replace (a, v, f, w) + local f, mask = checkfield(f, w) + v = v & mask + a = (a & ~(mask << f)) | (v << f) + return a & 0xFFFFFFFF +end + +return bit diff --git a/readme.md b/readme.md index aba0fc7..ad44dc5 100644 --- a/readme.md +++ b/readme.md @@ -37,7 +37,7 @@ Here it is an implementation of OMeta language in Lua. ## OMeta Citing [OMeta Homepage](http://www.tinlizzie.org/ometa/): -> OMeta is a new object-oriented language for pattern matching. It is based on a variant of Parsing Expression Grammars (PEGs) which we have extended to handle arbitrary data types. +> OMeta is a new object-oriented language for pattern matching. It is based on a variant of Parsing Expression Grammars (PEGs) which we have extended to handle arbitrary data types. OMeta's general-purpose pattern matching facilities provide a natural and convenient way for programmers to implement tokenizers, parsers, visitors, and tree transformers, all of which can be extended in interesting ways using familiar object-oriented mechanisms. Most of the features of the original [OMeta](http://www.tinlizzie.org/ometa/) and in particular [OMeta/JS](../../../../alexwarth/ometa-js) implementation also apply to OMeta/Lua. [Ph.D. dissertation](http://www.vpri.org/pdf/tr2008003_experimenting.pdf) of Alessandro Warth, author of OMeta is the best source of information in the subject. @@ -49,7 +49,7 @@ If you need more information about parsing and about Parsing Expression Grammars *Why another PEG for Lua - there is great [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/)?* -This project is a part of greater effort - to create an object-oriented platform for Computer-Aided Software Engineering. +This project is a part of greater effort - to create an object-oriented platform for Computer-Aided Software Engineering. In brief: I need a very general parsing solution, modular, extensible, working on any type of input, etc. I know that most of this requirements are possible to fulfill with LPeg, but the workload would be similar and the level of control would be much worse. Moreover I already know OMeta/JS and JavaScript but my knowledge of C (needed for LPeg extending) is definitely insufficient. One more reason for porting OMeta was its solution for the [*left recursion* issue](http://www.vpri.org/pdf/tr2007002_packrat.pdf) and [memoization](https://en.wikipedia.org/wiki/Memoization) in general. @@ -97,7 +97,7 @@ ometa Grammar2 {...} ometa Grammar3 merges Grammar1, Grammar2 {...} ``` One thing you should be aware of is the name conflicts resolution - the Rule with conflicting name will not be merged. - + ### Rule The Rule in OMeta/Lua is a kind of classifier. The Rule is a named element introduced within the Grammar or individually as a statement. ```lua @@ -113,27 +113,29 @@ end The Rule body in OMeta (and in PEG in general) is an *ordered Choice* where every alternative is a *Sequence of Nodes*. Every Node has a dual result - a boolean indicator of success (pass or fail) and some value (it is similar to *protected call* interface in Lua). If the Node fails (first result is falsy) then whole alternative fails - a value (second result) does not matter. If all Nodes in the Sequence pass, the value (second result) of the last Node becomes the value of current alternative and the whole Choice (subsequent alternatives are not checked). ### Hello World Grammar -A basic information on defining Rules can be summarized by the "Hello World" example - an elemental algebraic operations parser. +A basic information on defining Rules can be summarized by the "Hello World" example - an elemental algebraic operations parser. ```lua +local OMeta = require('ometa') + local ometa Calc merges require 'grammar_commons' { exp = addexp, addexp = addexp '+' mulexp | addexp '-' mulexp | mulexp - , + , mulexp = mulexp '*' primexp | mulexp '/' primexp | primexp - , + , primexp = '(' exp ')' | numstr - , + , numstr = '-'? digit+ -} +} ``` Since this Grammar doesn't have any [semantic action](#semantic-actions), it does not do very much. It is able to simply consume input stream as far as it is matching Rules. -### Rule features +### Rule features Below, there is an overview of the basic means used to build a Rule. |Syntax|Notes| @@ -152,7 +154,7 @@ The PEG's *semantic actions* in OMeta/Lua are generalized to the **Host Nodes**. A Host Node is included in a Rule body using square brackets (`[]`). A specific kind of the Host Node and its impact on the result of a whole Rule are determined by its content. The current implementation provides three kinds of the Host Nodes: - - **Host Expression** and **Host Statement** are corresponding to PEG's *semantic actions*, + - **Host Expression** and **Host Statement** are corresponding to PEG's *semantic actions*, - **Host Predicate** is corresponding to *semantic predicate*. #### Host Expression @@ -235,21 +237,23 @@ The above Rule will not work as you might expect - the first occurrence of the * ### Host Nodes & binding - Hello World - continued It is the time for our Grammar update, so: ```lua +local OMeta = require('ometa') + local ometa TableTreeCalc merges require'grammar_commons' { exp = addexp, - addexp = l:addexp '+' r:mulexp [{'+', l, r}] - | l:addexp '-' r:mulexp [{'-', l, r}] + addexp = l:addexp '+' r:mulexp [{'+', l, r}] + | l:addexp '-' r:mulexp [{'-', l, r}] | mulexp - , - mulexp = l:mulexp '*' r:primexp [{'*', l, r}] - | l:mulexp '/' r:primexp [{'/', l, r}] + , + mulexp = l:mulexp '*' r:primexp [{'*', l, r}] + | l:mulexp '/' r:primexp [{'/', l, r}] | primexp - , + , primexp = '(' $^:exp ')' | numstr - , + , numstr = toNumber(<'-'? digit+>) -} +} return TableTreeCalc ``` This Grammar is now able to build a simple parse tree (Lua tables hierarchy) from the expressions provided as strings, eg.: @@ -382,51 +386,52 @@ Firstly, for convenience reason we need to rewrite our Grammar to use something ```lua local Types = require'types' local class, Any, Array = Types.class, Types.Any, Types.Array -local OMeta = require'ometa' +local OMeta = require'ometa' local BinOp = class {name = 'BinOp', super = {Any}} -- our new AST node type local ometa OpTreeCalc merges require'grammar_commons' { exp = addexp, - addexp = l:addexp "+" r:mulexp [BinOp {operator = 'add', left = l, right = r}] - | l:addexp "-" r:mulexp [BinOp {operator = 'sub', left = l, right = r}] + addexp = l:addexp "+" r:mulexp [BinOp {operator = 'add', left = l, right = r}] + | l:addexp "-" r:mulexp [BinOp {operator = 'sub', left = l, right = r}] | mulexp - , - mulexp = l:mulexp "*" r:primexp [BinOp {operator = 'mul', left = l, right = r}] - | l:mulexp "/" r:primexp [BinOp {operator = 'div', left = l, right = r}] + , + mulexp = l:mulexp "*" r:primexp [BinOp {operator = 'mul', left = l, right = r}] + | l:mulexp "/" r:primexp [BinOp {operator = 'div', left = l, right = r}] | primexp - , + , primexp = "(" $^:exp ")" | numstr - , + , numstr = ws* toNumber(<"-"? digit+>), - special = '+' | '-' | '*' | '/' - | '(' | ')' -} + special = '+' | '-' | '*' | '/' + | '(' | ')' +} ``` BTW the Grammar uses the Tokens now, so white-space management is improved. Next, let's write a new derived (by means of merge) Grammar for parsing mixed content: ```lua +local OMeta = require'ometa' local Aux = require 'auxiliary' local ometa MixedOTCalc merges OpTreeCalc { primexp = BinOp | OpTreeCalc.primexp -- "super" apply , - numstr = number + numstr = number | OpTreeCalc.numstr -- "super" apply , eval = opr:&BinOp Aux.apply([opr.operator], unknown) | number | any:. [? error('unexpected expression: ' .. tostring(any))] - , + , add = {; left:=eval, right:=eval} [! print('+', left, right)] [left + right], sub = {; left:=eval, right:=eval} [! print('-', left, right)] [left - right], mul = {; left:=eval, right:=eval} [! print('*', left, right)] [left * right], div = {; left:=eval, right:=eval} [! print('/', left, right)] [left / right], unknown = {; operator:=.} [? error('unexpected operator: ' .. operator)] -} +} MixedOTCalc.BinOp = BinOp return MixedOTCalc @@ -568,7 +573,7 @@ ___ ```lua static OMeta::use(grammar : Grammar) : OMeta ``` -It is a class (static) method. +It is a class (static) method. It accepts the Grammar package and returns an instance of OMeta used as parsing context, e.g.: ```lua local LuaGrammar = require'lua_grammar' @@ -599,7 +604,7 @@ print(luaAst) ___ ```lua static OMeta::doFile(path : string, translator : string [0..1]) : Grammar -``` +``` Class (static) method. Load, parse, translate, generate and evaluate Lua source for OMeta source file. For example: