@@ -269,54 +269,53 @@ char *nasm_quote_cstr(const char *str, size_t *lenp)
269269 * to indicate the lead marker of a quoted string. If it is '\"', then
270270 * '`' is not a special character at all.
271271 */
272+ enum unq_state {
273+ st_start ,
274+ st_backslash ,
275+ st_byte , /* Byte numeric sequence */
276+ st_ucs , /* \u or \U */
277+ st_done
278+ };
272279
273280size_t nasm_unquote_anystr (char * str , char * * ep , const uint32_t badctl ,
274281 const char qstart )
275282{
276- unsigned char bq ;
277- const unsigned char * p ;
278- const unsigned char * escp = NULL ;
279- unsigned char * q ;
283+ const unsigned char bq = * str ;
284+ const unsigned char * p = (unsigned char * )str ;
285+ unsigned char * q = (unsigned char * )str ;
280286 unsigned char c ;
281287 uint32_t ctlmask = 0 ; /* Mask of control characters seen */
282- enum unq_state {
283- st_start ,
284- st_backslash ,
285- st_hex ,
286- st_oct ,
287- st_ucs ,
288- st_done
289- } state ;
290- int ndig = 0 ;
291- uint32_t nval = 0 ;
292-
293- p = q = (unsigned char * )str ;
294-
295- bq = * p ++ ;
296- if (!bq )
297- return 0 ;
298288
299289 if (bq == (unsigned char )qstart ) {
300- /* `...` string */
301- state = st_start ;
290+ /* `...` string or "..." with C unquoting */
291+ enum unq_state state = st_start ;
292+ unsigned int base = 0 ; /* Base of numeric escape sequence */
293+ uint64_t nval = 0 ; /* Accumulated value of numeric sequence */
294+ unsigned int v ;
295+ int ndig = 0 ; /* Max digits of numeric sequence */
296+ /* ndig < 0 means braced sequence */
297+ const unsigned char * escp = NULL ; /* Pointer to immediately after \ */
298+
299+ p ++ ; /* Skip initial quote */
302300
303301 while (state != st_done ) {
304302 c = * p ++ ;
305303 switch (state ) {
306304 case st_start :
307305 if (c == '\\' ) {
308306 state = st_backslash ;
309- } else if (( c == '\0' ) | ( c == bq ) ) {
307+ } else if (c == '\0' || c == bq ) {
310308 state = st_done ;
311309 } else {
312310 EMIT (c );
313311 }
314- break ;
312+ break ;
315313
316314 case st_backslash :
317315 state = st_start ;
318- escp = p ; /* Beginning of argument sequence */
316+ escp = p - 1 ;
319317 nval = 0 ;
318+
320319 switch (c ) {
321320 case 'a' :
322321 nval = 7 ;
@@ -341,20 +340,28 @@ size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
341340 break ;
342341 case 'u' :
343342 state = st_ucs ;
343+ base = 16 ;
344344 ndig = 4 ;
345- break ;
345+ goto check_brace ;
346346 case 'U' :
347347 state = st_ucs ;
348+ base = 16 ;
348349 ndig = 8 ;
349- break ;
350+ goto check_brace ;
350351 case 'v' :
351352 nval = 11 ;
352353 break ;
353354 case 'x' :
354355 case 'X' :
355- state = st_hex ;
356+ state = st_byte ;
357+ base = 16 ;
356358 ndig = 2 ;
357- break ;
359+ goto check_brace ;
360+ case 'd' : /* NASM extension: \d = decimal */
361+ state = st_byte ;
362+ base = 10 ;
363+ ndig = 3 ;
364+ goto check_brace ;
358365 case '0' :
359366 case '1' :
360367 case '2' :
@@ -363,10 +370,19 @@ size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
363370 case '5' :
364371 case '6' :
365372 case '7' :
366- state = st_oct ;
367- ndig = 2 ; /* Up to two more digits */
368- nval = c - '0' ;
369- break ;
373+ /* Back up both p and escp, as if there had been an "o" */
374+ p = escp -- ;
375+ /* fall through */
376+ case 'o' :
377+ state = st_byte ;
378+ ndig = 3 ;
379+ base = 8 ;
380+ check_brace : /* Is this the start of a braced sequence? */
381+ if (* p == '{' ) {
382+ p ++ ; /* Skip brace */
383+ ndig = -1 ;
384+ }
385+ break ;
370386 case '\0' :
371387 nval = '\\' ;
372388 p -- ; /* Reprocess; terminates string */
@@ -379,38 +395,37 @@ size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
379395 EMIT (nval );
380396 break ;
381397
382- case st_oct :
383- if (c >= '0' && c <= '7' ) {
384- nval = (nval << 3 ) + (c - '0' );
385- if (-- ndig )
386- break ; /* Might have more digits */
387- } else {
388- p -- ; /* Process this character again */
389- }
390- EMIT (nval );
391- state = st_start ;
392- break ;
393-
394- case st_hex :
398+ case st_byte :
395399 case st_ucs :
396- if (nasm_isxdigit ( c ) ) {
397- nval = (nval << 4 ) + numvalue ( c ) ;
400+ if (( v = numvalue_chk ( c )) < base ) {
401+ nval = (nval * base ) + v ;
398402 if (-- ndig )
399- break ; /* Might have more digits */
403+ break ; /* Continue processing number, no output */
404+ } else if (ndig < 0 ) {
405+ /* End of braced sequence */
406+ if (unlikely (c != '}' ))
407+ goto rewind ;
400408 } else {
401- p -- ; /* Process this character again */
409+ p -- ; /* Reprocess terminating character */
410+ if (unlikely (p == escp + 1 ))
411+ goto rewind ; /* No digits at all received */
402412 }
403413
404- if (unlikely (p <= escp ))
405- EMIT (escp [-1 ]);
406- else if (state == st_ucs )
414+ /* Emit the output */
415+ if (state == st_ucs )
407416 EMIT_UTF8 (nval );
408417 else
409418 EMIT (nval );
410419
411420 state = st_start ;
412421 break ;
413422
423+ /* Rewind an entire sequence as invalid */
424+ rewind :
425+ p = escp ; /* Start over at character following \ */
426+ state = st_start ;
427+ break ;
428+
414429 default :
415430 panic ();
416431 }
@@ -421,10 +436,11 @@ size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
421436 * * any kind, including collapsing double quote marks.)
422437 * We obviously can't get here if qstart == '\"'.
423438 */
424- while ((c = * p ++ ) && (c != bq ))
439+ p ++ ; /* Skip initial quote */
440+ while ((c = * p ++ ) && c != bq )
425441 EMIT (c );
426442 } else {
427- /* Not a quoted string, just return the input... */
443+ /* Not a quoted string, just return the input */
428444 while ((c = * p ++ ))
429445 EMIT (c );
430446 }
@@ -435,8 +451,11 @@ size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
435451 if (ctlmask & badctl )
436452 nasm_nonfatal ("control character in string not allowed here" );
437453
438- if (ep )
454+ if (ep ) {
455+ /* Point at the terminating character */
439456 * ep = (char * )p - 1 ;
457+ }
458+
440459 return (char * )q - str ;
441460}
442461#undef EMIT
@@ -471,11 +490,7 @@ char *nasm_skip_string(const char *str)
471490 char bq ;
472491 const char * p ;
473492 char c ;
474- enum unq_state {
475- st_start ,
476- st_backslash ,
477- st_done
478- } state ;
493+ enum unq_state state ;
479494
480495 bq = str [0 ];
481496 p = str + 1 ;
@@ -515,6 +530,9 @@ char *nasm_skip_string(const char *str)
515530 * a backquote will force a return to the st_start state,
516531 * and any possible multi-character state will terminate
517532 * for any non-alphanumeric character.
533+ *
534+ * The only reason this is needed at all is to detect
535+ * the \` sequence.
518536 */
519537 state = c ? st_start : st_done ;
520538 break ;
0 commit comments