Skip to content

Commit 03395ad

Browse files
committed
Ignore BOM mark -- take 2.
close #16.
1 parent bd94b33 commit 03395ad

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

JSON.awk

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ BEGIN { #{{{1
5757
srand(); RS="n/o/m/a/t/c/h" rand()
5858
}
5959

60-
1 == NR && match($0, /^\xEF\xBB\xBF/) { # strip BOM mark {{{1
61-
$0 = substr($0, RLENGTH + 1)
62-
}
63-
6460
{ # main loop: process each file in turn {{{1
6561
reset() # See important application note in reset()
6662

@@ -331,17 +327,18 @@ function tokenize(a1, pq,pb,ESCAPE,CHAR,STRING,NUMBER,KEYWORD,SPACE) { #{{{1
331327

332328
# POSIX character classes (gawk) - contact me for non-[:class:] notation
333329
# Replaced regex constant for string constant, see https://github.com/step-/JSON.awk/issues/1
330+
# BOM="(^\xEF\xBB\xBF)"
334331
# ESCAPE="(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})"
335332
# CHAR="[^[:cntrl:]\\\"]"
336333
# STRING="\"" CHAR "*(" ESCAPE CHAR "*)*\""
337334
# NUMBER="-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?"
338335
# KEYWORD="null|false|true"
339336
SPACE="[[:space:]]+"
340-
341-
# gsub(STRING "|" NUMBER "|" KEYWORD "|" SPACE "|.", "\n&", a1)
342-
gsub(/\"[^[:cntrl:]\"\\]*((\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})[^[:cntrl:]\"\\]*)*\"|-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?|null|false|true|[[:space:]]+|./, "\n&", a1)
337+
# ^BOM "|" STRING "|" NUMBER "|" KEYWORD "|" SPACE "|."
338+
gsub(/(^\xEF\xBB\xBF)|\"[^[:cntrl:]\"\\]*((\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})[^[:cntrl:]\"\\]*)*\"|-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?|null|false|true|[[:space:]]+|./, "\n&", a1)
343339
gsub("\n" SPACE, "\n", a1)
344-
sub(/^\n/, "", a1)
340+
# ^\n BOM?
341+
sub(/^\n(\xEF\xBB\xBF\n)?/, "", a1)
345342
ITOKENS=0 # get_token() helper
346343
return NTOKENS = split(a1, TOKENS, /\n/)
347344
}

0 commit comments

Comments
 (0)