@@ -57,10 +57,6 @@ BEGIN { #{{{1
5757 srand (); RS = " n/o/m/a/t/c/h" rand ()
5858}
5959
60- 1 == NR && match ($0 , /^ \xEF\xBB\xBF /) { # strip BOM mark {{{1
61- $0 = substr ($0 , RLENGTH + 1 )
62- }
63-
6460{ # main loop: process each file in turn {{{1
6561 reset() # See important application note in reset()
6662
@@ -331,17 +327,18 @@ function tokenize(a1, pq,pb,ESCAPE,CHAR,STRING,NUMBER,KEYWORD,SPACE) { #{{{1
331327
332328 # POSIX character classes (gawk) - contact me for non-[:class:] notation
333329 # Replaced regex constant for string constant, see https://github.com/step-/JSON.awk/issues/1
330+ # BOM="(^\xEF\xBB\xBF)"
334331# ESCAPE="(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})"
335332# CHAR="[^[:cntrl:]\\\"]"
336333# STRING="\"" CHAR "*(" ESCAPE CHAR "*)*\""
337334# NUMBER="-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?"
338335# KEYWORD="null|false|true"
339336 SPACE= " [[:space:]]+"
340-
341- # gsub(STRING "|" NUMBER "|" KEYWORD "|" SPACE "|.", "\n&", a1)
342- gsub (/\" [^ [:cntrl :] \"\\ ]* ((\\ [^ u [:cntrl :] ]| \\ u[0 -9a -fA -F ]{4 } )[^ [:cntrl :] \"\\ ]* )* \" | -? (0| [1 -9 ][0 -9 ]* )([. ][0 -9 ]* )? ([eE ][+ - ]? [0 -9 ]* )? | null| false| true| [[:space :] ]+ | . /, " \n &" , a1)
337+ # ^BOM "|" STRING "|" NUMBER "|" KEYWORD "|" SPACE "|."
338+ gsub (/(^ \xEF\xBB\xBF )| \" [^ [:cntrl :] \"\\ ]* ((\\ [^ u [:cntrl :] ]| \\ u[0 -9a -fA -F ]{4 } )[^ [:cntrl :] \"\\ ]* )* \" | -? (0| [1 -9 ][0 -9 ]* )([. ][0 -9 ]* )? ([eE ][+ - ]? [0 -9 ]* )? | null| false| true| [[:space :] ]+ | . /, " \n &" , a1)
343339 gsub (" \n " SPACE, " \n " , a1)
344- sub (/^ \n /, " " , a1)
340+ # ^\n BOM?
341+ sub (/^ \n (\xEF\xBB\xBF \n )? /, " " , a1)
345342 ITOKENS= 0 # get_token() helper
346343 return NTOKENS = split (a1, TOKENS, /\n /)
347344}
0 commit comments