@@ -5,19 +5,16 @@ import {
55 NodeLike ,
66 DocumentFragmentLike ,
77} from './Doc.ts'
8- import { selfClosingTags } from './self-closing-tags.ts'
98
109export type NodeHandlerCallback = ( node : ElementLike | NodeLike ) => void
1110
11+ const SELF_CLOSING_TAGS =
12+ / ^ ( A R E A | M E T A | B A S E | B R | C O L | E M B E D | H R | I M G | I N P U T | L I N K | P A R A M | S O U R C E | T R A C K | W B R | C O M M A N D | K E Y G E N | M E N U I T E M | D O C T Y P E | ! D O C T Y P E ) $ / i
1213// Pre-compiled regexes for better performance
1314const HTML_PATTERN =
1415 / < ! - - ( [ ^ ] * ?(? = - - > ) ) - - > | < ( \/ | ! ) ? ( [ a - z ] [ a - z 0 - 9 - ] * ) \s * ( [ ^ > ] * ?) ( \/ ? ) > / gi
1516const ATTR_PATTERN =
1617 / ( [ a - z ] [ \w - . : ] * ) (?: \s * = \s * (?: " ( [ ^ " ] * ) " | ' ( [ ^ ' ] * ) ' | ( \S + ) ) ) ? / gi
17- const SVG_TEST = / s v g / i
18- const MATH_TEST = / m a t h / i
19- const HTML_TEST = / h t m l / i
20- const SCRIPT_TEST = / ^ S C R I P T $ / i
2118
2219// URI based on https://developer.mozilla.org/en-US/docs/Web/API/Document/createElementNS
2320const NSURI : Record < string , string > = {
@@ -26,8 +23,35 @@ const NSURI: Record<string, string> = {
2623 MATH : 'http://www.w3.org/1998/Math/MathML' ,
2724}
2825
29- // Cache self-closing tags regex
30- let selfClosingTagsRegex : RegExp
26+ // Cache for tag regexes and namespaces
27+ const tagRegexCache = new Map < string , RegExp > ( )
28+ const namespaceCache = new Map < string , string > ( )
29+
30+ const getTagRegex = ( tagName : string ) : RegExp => {
31+ let regex = tagRegexCache . get ( tagName )
32+ if ( ! regex ) {
33+ regex = new RegExp ( tagName , 'i' )
34+ tagRegexCache . set ( tagName , regex )
35+ }
36+ return regex
37+ }
38+
39+ const getNamespace = ( tagName : string , parentNS ?: string ) : string => {
40+ let ns = namespaceCache . get ( tagName )
41+ if ( ! ns ) {
42+ const lower = tagName . toLowerCase ( )
43+ ns =
44+ lower === 'svg'
45+ ? NSURI . SVG
46+ : lower . startsWith ( 'math' )
47+ ? NSURI . MATH
48+ : lower === 'html'
49+ ? NSURI . HTML
50+ : parentNS ?? NSURI . HTML
51+ namespaceCache . set ( tagName , ns )
52+ }
53+ return ns
54+ }
3155
3256const setAttributes = ( node : Element | ElementLike , attributes : string ) => {
3357 const trimmed = attributes ?. trim ( )
@@ -38,11 +62,7 @@ const setAttributes = (node: Element | ElementLike, attributes: string) => {
3862
3963 while ( ( match = ATTR_PATTERN . exec ( trimmed ) ) ) {
4064 const name = match [ 1 ]
41- const value =
42- match [ 2 ] ??
43- match [ 3 ] ??
44- match [ 4 ] ??
45- ( match [ 0 ] . includes ( '=' ) ? '' : '' )
65+ const value = match [ 2 ] ?? match [ 3 ] ?? match [ 4 ] ?? ''
4666 node . setAttribute ( name , value )
4767 }
4868}
@@ -55,6 +75,18 @@ export const parse = <D extends Partial<DocumentLike | Document>>(
5575 markup : string ,
5676 handler : D | NodeHandlerCallback = Doc as D
5777) : ParseReturn < D > => {
78+ // Fast path for simple text-only content
79+ if ( ! markup . includes ( '<' ) ) {
80+ const doc = (
81+ ! handler || typeof handler === 'function' ? Doc : handler
82+ ) as DocumentLike
83+ const fragment = doc . createDocumentFragment ( )
84+ const textNode = doc . createTextNode ( markup )
85+ fragment . appendChild ( textNode )
86+ if ( typeof handler === 'function' ) handler ( textNode )
87+ return fragment as ParseReturn < D >
88+ }
89+
5890 HTML_PATTERN . lastIndex = 0
5991 let match : RegExpExecArray | null = null
6092 const doc = (
@@ -63,17 +95,14 @@ export const parse = <D extends Partial<DocumentLike | Document>>(
6395 const cb = ( typeof handler === 'function'
6496 ? handler
6597 : null ) as unknown as NodeHandlerCallback
66- const stack : Array < ElementLike | DocumentFragmentLike > = [
67- doc . createDocumentFragment ( ) ,
68- ]
98+
99+ // Pre-allocate stack with reasonable size
100+ const stack : Array < ElementLike | DocumentFragmentLike > = new Array ( 32 )
101+ stack [ 0 ] = doc . createDocumentFragment ( )
102+ let stackIndex = 0
69103 let lastIndex = 0
70104 const markupLength = markup . length
71105
72- // Cache self-closing tags regex
73- if ( ! selfClosingTagsRegex ) {
74- selfClosingTagsRegex = selfClosingTags ( )
75- }
76-
77106 while ( ( match = HTML_PATTERN . exec ( markup ) ) !== null ) {
78107 const [
79108 ,
@@ -89,11 +118,10 @@ export const parse = <D extends Partial<DocumentLike | Document>>(
89118 continue
90119 }
91120
92- const stackTop = stack . length - 1
93- const stackLastItem = stack [ stackTop ]
121+ const stackLastItem = stack [ stackIndex ]
94122
95- // pre lingering text
96- if ( match . index >= lastIndex + 1 ) {
123+ // Pre- lingering text
124+ if ( match . index > lastIndex ) {
97125 const text = markup . slice ( lastIndex , match . index )
98126 const node = doc . createTextNode ( text )
99127 stackLastItem ?. appendChild ( node )
@@ -112,46 +140,34 @@ export const parse = <D extends Partial<DocumentLike | Document>>(
112140 if ( tagName ) {
113141 if ( bangOrClosingSlash ) {
114142 const stackTagName = stackLastItem ?. tagName
115- if (
116- stackTagName &&
117- new RegExp ( tagName , 'i' ) . test ( stackTagName )
118- ) {
119- stack . pop ( )
143+ if ( stackTagName && getTagRegex ( tagName ) . test ( stackTagName ) ) {
144+ stackIndex --
120145 }
121146 continue
122147 }
123148
124- const ns = SVG_TEST . test ( tagName )
125- ? NSURI . SVG
126- : MATH_TEST . test ( tagName )
127- ? NSURI . MATH
128- : HTML_TEST . test ( tagName )
129- ? NSURI . HTML
130- : ( stackLastItem as ElementLike ) ?. namespaceURI ?? NSURI . HTML
149+ const ns = getNamespace (
150+ tagName ,
151+ ( stackLastItem as ElementLike ) ?. namespaceURI
152+ )
153+ const isSelfClosing =
154+ SELF_CLOSING_TAGS . test ( tagName . toLowerCase ( ) ) ||
155+ selfClosingSlash === '/'
131156
132- const selfClosingTag =
133- selfClosingTagsRegex . test ( tagName ) || selfClosingSlash === '/'
157+ const node = doc . createElementNS ( ns , tagName )
158+ setAttributes ( node , attributes )
159+ stackLastItem ?. appendChild ( node )
134160
135- if ( selfClosingTag ) {
136- const node = doc . createElementNS ( ns , tagName )
137- setAttributes ( node , attributes )
138- stackLastItem ?. appendChild ( node )
161+ if ( isSelfClosing ) {
139162 cb ?.( node )
140163 continue
141164 }
142165
143- const node = doc . createElementNS ( ns , tagName )
144- setAttributes ( node , attributes )
145- stackLastItem ?. appendChild ( node )
146-
147- // scripts in particular can have html strings that do not need to be rendered.
148- // The overall markup therefore we need a special lookup to find the closing tag
149- // without considering these possible HTML tag matches to be part of the final DOM
150- if ( SCRIPT_TEST . test ( tagName ) ) {
151- // try to find the closing tag
166+ // Handle script tags specially
167+ if ( tagName . toUpperCase ( ) === 'SCRIPT' ) {
152168 const possibleSimilarOnesNested : string [ ] = [ ]
153169 const exactTagPattern = new RegExp (
154- `<(\\/)?(${ tagName } )\\s*([^>]*? )>` ,
170+ `<(\\/)?(${ tagName } )\\s*([^>]*)>` ,
155171 'ig'
156172 )
157173 const markupAhead = markup . slice ( lastIndex )
@@ -160,10 +176,9 @@ export const parse = <D extends Partial<DocumentLike | Document>>(
160176 while (
161177 ( tagMatch = exactTagPattern . exec ( markupAhead ) ) !== null
162178 ) {
163- const [ , closingSlash , name , , selfClosingSlash ] = tagMatch
179+ const [ , closingSlash , name ] = tagMatch
164180
165- // check if the tag name is matched
166- if ( new RegExp ( tagName , 'i' ) . test ( name ) ) {
181+ if ( getTagRegex ( tagName ) . test ( name ) ) {
167182 if ( closingSlash ) {
168183 if ( ! possibleSimilarOnesNested . length ) {
169184 const textNode = doc . createTextNode (
@@ -172,20 +187,18 @@ export const parse = <D extends Partial<DocumentLike | Document>>(
172187 node . appendChild ( textNode )
173188 lastIndex =
174189 lastIndex + exactTagPattern . lastIndex
175- HTML_PATTERN . lastIndex = lastIndex // move the pattern needle to start matching later in the string
190+ HTML_PATTERN . lastIndex = lastIndex
176191 break
177192 } else {
178193 possibleSimilarOnesNested . pop ( )
179194 }
180- } else if ( ! selfClosingSlash ) {
181- // could be that there is a script HTML string inside
182- // we need to track those, so we don't mix them with the possible script closing tag
195+ } else {
183196 possibleSimilarOnesNested . push ( name )
184197 }
185198 }
186199 }
187200 } else {
188- stack . push ( node )
201+ stack [ ++ stackIndex ] = node
189202 }
190203
191204 cb ?.( node )
0 commit comments