parser.lua 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. -----------------------
  2. -- simple proto parser
  3. -----------------------
  4. local lpeg = require "lpeg"
  5. local P = lpeg.P
  6. local S = lpeg.S
  7. local R = lpeg.R
  8. local C = lpeg.C
  9. local Ct = lpeg.Ct
  10. local Cg = lpeg.Cg
  11. local Cc = lpeg.Cc
  12. local V = lpeg.V
  13. local next = next
  14. local error = error
  15. local tonumber = tonumber
  16. local pairs = pairs
  17. local ipairs = ipairs
  18. local rawset = rawset
  19. local tinsert = table.insert
  20. local smatch = string.match
  21. local sbyte = string.byte
  22. local internal_type = {
  23. double = "TYPE_DOUBLE",
  24. float = "TYPE_FLOAT",
  25. uint64 = "TYPE_UINT64",
  26. int = "TYPE_INT32",
  27. int32 = "TYPE_INT32",
  28. int64 = "TYPE_INT64",
  29. fixed64 = "TYPE_FIXED64",
  30. fixed32 = "TYPE_FIXED32",
  31. bool = "TYPE_BOOL",
  32. string = "TYPE_STRING",
  33. bytes = "TYPE_BYTES",
  34. uint32 = "TYPE_UINT32",
  35. sfixed32 = "TYPE_SFIXED32",
  36. sfixed64 = "TYPE_SFIXED64",
  37. sint32 = "TYPE_SINT32",
  38. sint64 = "TYPE_SINT64",
  39. }
  40. local function count_lines(_,pos, parser_state)
  41. if parser_state.pos < pos then
  42. parser_state.line = parser_state.line + 1
  43. parser_state.pos = pos
  44. end
  45. return pos
  46. end
  47. local exception = lpeg.Cmt( lpeg.Carg(1) , function ( _ , pos, parser_state)
  48. error( "syntax error at [" .. (parser_state.file or "") .."] (" .. parser_state.line ..")" )
  49. return pos
  50. end)
  51. local eof = P(-1)
  52. local newline = lpeg.Cmt((P"\n" + "\r\n") * lpeg.Carg(1) ,count_lines)
  53. local line_comment = "//" * (1 - newline) ^0 * (newline + eof)
  54. local blank = S" \t" + newline + line_comment
  55. local blank0 = blank ^ 0
  56. local blanks = blank ^ 1
  57. local alpha = R"az" + R"AZ" + "_"
  58. local alnum = alpha + R"09"
  59. local str_c = (1 - S("\\\"")) + P("\\") * 1
  60. local str = P"\"" * C(str_c^0) * "\""
  61. local dotname = ("." * alpha * alnum ^ 0) ^ 0
  62. local typename = C(alpha * alnum ^ 0 * dotname)
  63. local name = C(alpha * alnum ^ 0)
  64. local filename = P"\"" * C((alnum + "/" + "." + "-")^1) * "\""
  65. local id = R"09" ^ 1 / tonumber + "max" * Cc(-1)
  66. local bool = "true" * Cc(true) + "false" * Cc(false)
  67. local value = str + bool + name + id
  68. local patterns = {}
  69. local enum_item = Cg(name * blank0 * "=" * blank0 * id * blank0 * ";" * blank0)
  70. local function insert(tbl, k,v)
  71. tinsert(tbl, { name = k , number = v })
  72. return tbl
  73. end
  74. patterns.ENUM = Ct(Cg("enum","type") * blanks * Cg(typename,"name") * blank0 *
  75. "{" * blank0 *
  76. Cg(lpeg.Cf(Ct"" * enum_item^1 , insert),"value")
  77. * "}" * blank0)
  78. local prefix_field = P"required" * Cc"LABEL_REQUIRED" +
  79. P"optional" * Cc"LABEL_OPTIONAL" +
  80. P"repeated" * Cc"LABEL_REPEATED"
  81. local postfix_pair = blank0 * Cg(name * blank0 * "=" * blank0 * value * blank0)
  82. local postfix_pair_2 = blank0 * "," * postfix_pair
  83. local postfix_field = "[" * postfix_pair * postfix_pair_2^0 * blank0 * "]"
  84. local options = lpeg.Cf(Ct"" * postfix_field , rawset) ^ -1
  85. local function setoption(t, options)
  86. if next(options) then
  87. t.options = options
  88. end
  89. return t
  90. end
  91. local message_field = lpeg.Cf (
  92. Ct( Cg(prefix_field,"label") * blanks *
  93. Cg(typename,"type_name") * blanks *
  94. Cg(name,"name") * blank0 * "=" * blank0 *
  95. Cg(id,"number")
  96. ) * blank0 * options ,
  97. setoption) * blank0 * ";" * blank0
  98. local extensions = Ct(
  99. Cg("extensions" , "type") * blanks *
  100. Cg(id,"start") * blanks * "to" * blanks *
  101. Cg(id,"end") * blank0 * ";" * blank0
  102. )
  103. patterns.EXTEND = Ct(
  104. Cg("extend", "type") * blanks *
  105. Cg(typename, "name") * blank0 * "{" * blank0 *
  106. Cg(Ct((message_field) ^ 1),"extension") * "}" * blank0
  107. )
  108. patterns.MESSAGE = P { Ct(
  109. Cg("message","type") * blanks *
  110. Cg(typename,"name") * blank0 * "{" * blank0 *
  111. Cg(Ct((message_field + patterns.ENUM + extensions + patterns.EXTEND + V(1)) ^ 0),"items") * "}" * blank0
  112. ) }
  113. patterns.OPTION = Ct(
  114. Cg("option" , "type") * blanks *
  115. Cg(name, "name") * blank0 * "=" * blank0 *
  116. Cg(value, "value")
  117. ) * blank0 * ";" * blank0
  118. patterns.IMPORT = Ct( Cg("import" , "type") * blanks * Cg(filename, "name") ) * blank0 * ";" * blank0
  119. patterns.PACKAGE = Ct( Cg("package", "type") * blanks * Cg(typename, "name") ) * blank0 * ";" * blank0
  120. local proto_tbl = { "PROTO" }
  121. do
  122. local k, v = next(patterns)
  123. local p = V(k)
  124. proto_tbl[k] = v
  125. for k,v in next , patterns , k do
  126. proto_tbl[k] = v
  127. p = p + V(k)
  128. end
  129. proto_tbl.PROTO = Ct(blank0 * p ^ 1)
  130. end
  131. local proto = P(proto_tbl)
  132. local deal = {}
  133. function deal:import(v)
  134. self.dependency = self.dependency or {}
  135. tinsert(self.dependency , v.name)
  136. end
  137. function deal:package(v)
  138. self.package = v.name
  139. end
  140. function deal:enum(v)
  141. self.enum_type = self.enum_type or {}
  142. tinsert(self.enum_type , v)
  143. end
  144. function deal:option(v)
  145. self.options = self.options or {}
  146. self.options[v.name] = v.value
  147. end
  148. function deal:extend(v)
  149. self.extension = self.extension or {}
  150. local extendee = v.name
  151. for _,v in ipairs(v.extension) do
  152. v.extendee = extendee
  153. v.type = internal_type[v.type_name]
  154. if v.type then
  155. v.type_name = nil
  156. end
  157. tinsert(self.extension , v)
  158. end
  159. end
  160. function deal:extensions(v)
  161. self.extension_range = self.extension_range or {}
  162. tinsert(self.extension_range, v)
  163. end
  164. local function _add_nested_message(self, item)
  165. if item.type == nil then
  166. item.type = internal_type[item.type_name]
  167. if item.type then
  168. item.type_name = nil
  169. end
  170. self.field = self.field or {}
  171. tinsert(self.field, item)
  172. else
  173. local f = deal[item.type]
  174. item.type = nil
  175. f(self , item)
  176. end
  177. end
  178. function deal:message(v)
  179. self.nested_type = self.nested_type or {}
  180. local m = { name = v.name }
  181. tinsert(self.nested_type , m)
  182. for _,v in ipairs(v.items) do
  183. _add_nested_message(m, v)
  184. end
  185. end
  186. local function fix(r)
  187. local p = {}
  188. for _,v in ipairs(r) do
  189. local f = deal[v.type]
  190. v.type = nil
  191. f(p , v)
  192. end
  193. p.message_type = p.nested_type
  194. p.nested_type = nil
  195. return p
  196. end
  197. --- fix message name
  198. local NULL = {}
  199. local function _match_name(namespace , n , all)
  200. if sbyte(n) == 46 then
  201. return n
  202. end
  203. repeat
  204. local name = namespace .. "." .. n
  205. if all[name] then
  206. return name
  207. end
  208. namespace = smatch(namespace,"(.*)%.[%w_]+$")
  209. until namespace == nil
  210. end
  211. local function _fix_field(namespace , field, all)
  212. local type_name = field.type_name
  213. if type_name == "" then
  214. field.type_name = nil
  215. return
  216. elseif type_name == nil then
  217. return
  218. end
  219. local full_name = assert(_match_name(namespace, field.type_name, all) , field.type_name , all)
  220. field.type_name = full_name
  221. field.type = all[full_name]
  222. local options = field.options
  223. if options then
  224. if options.default then
  225. field.default_value = tostring(options.default)
  226. options.default = nil
  227. end
  228. if next(options) == nil then
  229. field.options = nil
  230. end
  231. end
  232. end
  233. local function _fix_extension(namespace, ext, all)
  234. for _,field in ipairs(ext or NULL) do
  235. field.extendee = assert(_match_name(namespace, field.extendee,all),field.extendee)
  236. _fix_field(namespace , field , all)
  237. end
  238. end
  239. local function _fix_message(msg , all)
  240. for _,field in ipairs(msg.field or NULL) do
  241. _fix_field(assert(all[msg],msg.name) , field , all)
  242. end
  243. for _,nest in ipairs(msg.nested_type or NULL) do
  244. _fix_message(nest , all)
  245. end
  246. _fix_extension(all[msg] , msg.extension , all)
  247. end
  248. local function _fix_typename(file , all)
  249. for _,message in ipairs(file.message_type or NULL) do
  250. _fix_message(message , all)
  251. end
  252. _fix_extension(file.package , file.extension , all)
  253. end
  254. --- merge messages
  255. local function _enum_fullname(prefix, enum , all)
  256. local fullname
  257. if sbyte(enum.name) == 46 then
  258. fullname = enum.name
  259. else
  260. fullname = prefix .. "." .. enum.name
  261. end
  262. all[fullname] = "TYPE_ENUM"
  263. all[enum] = fullname
  264. end
  265. local function _message_fullname(prefix , msg , all)
  266. local fullname
  267. if sbyte(msg.name) == 46 then
  268. fullname = msg.name
  269. else
  270. fullname = prefix .. "." .. msg.name
  271. end
  272. all[fullname] = "TYPE_MESSAGE"
  273. all[msg] = fullname
  274. for _,nest in ipairs(msg.nested_type or NULL) do
  275. _message_fullname(fullname , nest , all)
  276. end
  277. for _,enum in ipairs(msg.enum_type or NULL) do
  278. _enum_fullname(fullname , enum , all)
  279. end
  280. end
  281. local function _gen_fullname(file , all)
  282. local prefix = ""
  283. if file.package then
  284. prefix = "." .. file.package
  285. end
  286. for _,message in ipairs(file.message_type or NULL) do
  287. _message_fullname(prefix , message , all)
  288. end
  289. for _,enum in ipairs(file.enum_type or NULL) do
  290. _enum_fullname(prefix , enum , all)
  291. end
  292. end
  293. --- parser
  294. local parser = {}
  295. local function parser_one(text,filename)
  296. local state = { file = filename, pos = 0, line = 1 }
  297. local r = lpeg.match(proto * -1 + exception , text , 1, state )
  298. local t = fix(r)
  299. return t
  300. end
  301. function parser.parser(text,filename)
  302. local t = parser_one(text,filename)
  303. local all = {}
  304. _gen_fullname(t,all)
  305. _fix_typename(t , all)
  306. return t
  307. end
  308. local pb = require "protobuf"
  309. function parser.register(fileset , path)
  310. local all = {}
  311. local files = {}
  312. if type(fileset) == "string" then
  313. fileset = { fileset }
  314. end
  315. for _, filename in ipairs(fileset) do
  316. local fullname
  317. if path then
  318. fullname = path .. "/" .. filename
  319. else
  320. fullname = filename
  321. end
  322. local f = assert(io.open(fullname , "r"))
  323. local buffer = f:read "*a"
  324. f:close()
  325. local t = parser_one(buffer,filename)
  326. _gen_fullname(t,all)
  327. t.name = filename
  328. tinsert(files , t)
  329. end
  330. for _,file in ipairs(files) do
  331. _fix_typename(file,all)
  332. end
  333. local pbencode = pb.encode("google.protobuf.FileDescriptorSet" , { file = files })
  334. if pbencode == nil then
  335. error(pb.lasterror())
  336. end
  337. pb.register(pbencode)
  338. return files
  339. end
  340. return parser