MagickCore 6.9.13
Loading...
Searching...
No Matches
token.c
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% Cristy %
17% January 1993 %
18% %
19% %
20% Copyright 1999 ImageMagick Studio LLC, a non-profit organization %
21% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% https://imagemagick.org/script/license.php %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/image-private.h"
48#include "magick/locale-private.h"
49#include "magick/memory_.h"
50#include "magick/string_.h"
51#include "magick/string-private.h"
52#include "magick/token.h"
53#include "magick/token-private.h"
54#include "magick/utility.h"
55
56/*
57 Typedef declarations.
58*/
60{
61 int
62 state;
63
64 MagickStatusType
65 flag;
66
67 ssize_t
68 offset;
69
70 char
71 quote;
72
73 size_t
74 signature;
75};
76
77/*
78%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
79% %
80% %
81% %
82% A c q u i r e T o k e n I n f o %
83% %
84% %
85% %
86%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87%
88% AcquireTokenInfo() allocates the TokenInfo structure.
89%
90% The format of the AcquireTokenInfo method is:
91%
92% TokenInfo *AcquireTokenInfo()
93%
94*/
95MagickExport TokenInfo *AcquireTokenInfo(void)
96{
98 *token_info;
99
100 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
101 if (token_info == (TokenInfo *) NULL)
102 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
103 token_info->signature=MagickCoreSignature;
104 return(token_info);
105}
106
107/*
108%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109% %
110% %
111% %
112% D e s t r o y T o k e n I n f o %
113% %
114% %
115% %
116%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117%
118% DestroyTokenInfo() deallocates memory associated with an TokenInfo
119% structure.
120%
121% The format of the DestroyTokenInfo method is:
122%
123% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124%
125% A description of each parameter follows:
126%
127% o token_info: Specifies a pointer to an TokenInfo structure.
128%
129*/
130MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131{
132 assert(token_info != (TokenInfo *) NULL);
133 assert(token_info->signature == MagickCoreSignature);
134 if (IsEventLogging() != MagickFalse)
135 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
136 token_info->signature=(~MagickCoreSignature);
137 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138 return(token_info);
139}
140
141/*
142%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143% %
144% %
145% %
146+ G e t N e x t T o k e n %
147% %
148% %
149% %
150%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151%
152% GetNextToken() gets a token from the token stream. A token is defined as
153% a sequence of characters delimited by whitespace (e.g. clip-path), a
154% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
155% parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
156% separator characters: ':', '=', ',', and ';'. GetNextToken() returns the
157% length of the consumed token.
158%
159% The format of the GetNextToken method is:
160%
161% size_t GetNextToken(const char *magick_restrict start,
162% const char **magick_restrict end,const size_t extent,
163% char *magick_restrict token)
164%
165% A description of each parameter follows:
166%
167% o start: the start of the token sequence.
168%
169% o end: point to the end of the token sequence.
170%
171% o extent: maximum extent of the token.
172%
173% o token: copy the token to this buffer.
174%
175*/
176MagickExport magick_hot_spot size_t GetNextToken(
177 const char *magick_restrict start,const char **magick_restrict end,
178 const size_t extent,char *magick_restrict token)
179{
180 double
181 value;
182
183 char
184 *magick_restrict q;
185
186 const char
187 *magick_restrict p;
188
189 ssize_t
190 i;
191
192 assert(start != (const char *) NULL);
193 assert(token != (char *) NULL);
194 i=0;
195 p=start;
196 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
197 p++;
198 switch (*p)
199 {
200 case '\0':
201 break;
202 case '"':
203 case '\'':
204 case '`':
205 case '{':
206 {
207 char
208 escape;
209
210 switch (*p)
211 {
212 case '"': escape='"'; break;
213 case '\'': escape='\''; break;
214 case '`': escape='\''; break;
215 case '{': escape='}'; break;
216 default: escape=(*p); break;
217 }
218 for (p++; *p != '\0'; p++)
219 {
220 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
221 p++;
222 else
223 if (*p == escape)
224 {
225 p++;
226 break;
227 }
228 if (i < (ssize_t) (extent-1))
229 token[i++]=(*p);
230 if ((size_t) (p-start) >= (extent-1))
231 break;
232 }
233 break;
234 }
235 case '/':
236 {
237 if (i < (ssize_t) (extent-1))
238 token[i++]=(*p);
239 p++;
240 if ((*p == '>') || (*p == '/'))
241 {
242 if (i < (ssize_t) (extent-1))
243 token[i++]=(*p);
244 p++;
245 }
246 break;
247 }
248 default:
249 {
250 char
251 *q;
252
253 value=StringToDouble(p,&q);
254 (void) value;
255 if ((p != q) && (*p != ','))
256 {
257 for ( ; (p < q) && (*p != ','); p++)
258 {
259 if (i < (ssize_t) (extent-1))
260 token[i++]=(*p);
261 if ((size_t) (p-start) >= (extent-1))
262 break;
263 }
264 if (*p == '%')
265 {
266 if (i < (ssize_t) (extent-1))
267 token[i++]=(*p);
268 p++;
269 }
270 break;
271 }
272 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
273 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
274 {
275 if (i < (ssize_t) (extent-1))
276 token[i++]=(*p);
277 p++;
278 break;
279 }
280 for ( ; *p != '\0'; p++)
281 {
282 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
283 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
284 break;
285 if ((i > 0) && (*p == '<'))
286 break;
287 if (i < (ssize_t) (extent-1))
288 token[i++]=(*p);
289 if (*p == '>')
290 break;
291 if (*p == '(')
292 {
293 for (p++; *p != '\0'; p++)
294 {
295 if (i < (ssize_t) (extent-1))
296 token[i++]=(*p);
297 if ((*p == ')') && (*(p-1) != '\\'))
298 break;
299 if ((size_t) (p-start) >= (extent-1))
300 break;
301 }
302 if (*p == '\0')
303 break;
304 }
305 if ((size_t) (p-start) >= (extent-1))
306 break;
307 }
308 break;
309 }
310 }
311 token[i]='\0';
312 if (LocaleNCompare(token,"url(#",5) == 0)
313 {
314 q=strrchr(token,')');
315 if (q != (char *) NULL)
316 {
317 *q='\0';
318 (void) memmove(token,token+5,(size_t) (q-token-4));
319 }
320 }
321 while (isspace((int) ((unsigned char) *p)) != 0)
322 p++;
323 if (end != (const char **) NULL)
324 *end=(const char *) p;
325 return(p-start+1);
326}
327
328/*
329%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
330% %
331% %
332% %
333% G l o b E x p r e s s i o n %
334% %
335% %
336% %
337%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
338%
339% GlobExpression() returns MagickTrue if the expression matches the pattern.
340%
341% The format of the GlobExpression function is:
342%
343% MagickBooleanType GlobExpression(const char *magick_restrict expression,
344% const char *magick_restrict pattern,
345% const MagickBooleanType case_insensitive)
346%
347% A description of each parameter follows:
348%
349% o expression: Specifies a pointer to a text string containing a file name.
350%
351% o pattern: Specifies a pointer to a text string containing a pattern.
352%
353% o case_insensitive: set to MagickTrue to ignore the case when matching
354% an expression.
355%
356*/
357MagickExport MagickBooleanType GlobExpression(
358 const char *magick_restrict expression,const char *magick_restrict pattern,
359 const MagickBooleanType case_insensitive)
360{
361 char
362 path[MagickPathExtent];
363
364 MagickBooleanType
365 done,
366 match;
367
368 /*
369 Return on empty pattern or '*'.
370 */
371 if (pattern == (char *) NULL)
372 return(MagickTrue);
373 if (GetUTFCode(pattern) == 0)
374 return(MagickTrue);
375 if (LocaleCompare(pattern,"*") == 0)
376 return(MagickTrue);
377 GetPathComponent(pattern,SubimagePath,path);
378 if (*path != '\0')
379 return(MagickFalse);
380 /*
381 Evaluate glob expression.
382 */
383 done=MagickFalse;
384 while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
385 {
386 if (GetUTFCode(expression) == 0)
387 if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
388 break;
389 switch (GetUTFCode(pattern))
390 {
391 case '*':
392 {
393 MagickBooleanType
394 status;
395
396 status=MagickFalse;
397 while (GetUTFCode(pattern) == '*')
398 pattern+=GetUTFOctets(pattern);
399 while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
400 {
401 status=GlobExpression(expression,pattern,case_insensitive);
402 expression+=GetUTFOctets(expression);
403 }
404 if (status != MagickFalse)
405 {
406 while (GetUTFCode(expression) != 0)
407 expression+=GetUTFOctets(expression);
408 while (GetUTFCode(pattern) != 0)
409 pattern+=GetUTFOctets(pattern);
410 }
411 break;
412 }
413 case '[':
414 {
415 int
416 c;
417
418 pattern+=GetUTFOctets(pattern);
419 for ( ; ; )
420 {
421 if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
422 {
423 done=MagickTrue;
424 break;
425 }
426 if (GetUTFCode(pattern) == '\\')
427 {
428 pattern+=GetUTFOctets(pattern);
429 if (GetUTFCode(pattern) == 0)
430 {
431 done=MagickTrue;
432 break;
433 }
434 }
435 if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
436 {
437 c=GetUTFCode(pattern);
438 pattern+=GetUTFOctets(pattern);
439 pattern+=GetUTFOctets(pattern);
440 if (GetUTFCode(pattern) == ']')
441 {
442 done=MagickTrue;
443 break;
444 }
445 if (GetUTFCode(pattern) == '\\')
446 {
447 pattern+=GetUTFOctets(pattern);
448 if (GetUTFCode(pattern) == 0)
449 {
450 done=MagickTrue;
451 break;
452 }
453 }
454 if ((GetUTFCode(expression) < c) ||
455 (GetUTFCode(expression) > GetUTFCode(pattern)))
456 {
457 pattern+=GetUTFOctets(pattern);
458 continue;
459 }
460 }
461 else
462 if (GetUTFCode(pattern) != GetUTFCode(expression))
463 {
464 pattern+=GetUTFOctets(pattern);
465 continue;
466 }
467 pattern+=GetUTFOctets(pattern);
468 while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
469 {
470 if ((GetUTFCode(pattern) == '\\') &&
471 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
472 pattern+=GetUTFOctets(pattern);
473 pattern+=GetUTFOctets(pattern);
474 }
475 if (GetUTFCode(pattern) != 0)
476 {
477 pattern+=GetUTFOctets(pattern);
478 expression+=GetUTFOctets(expression);
479 }
480 break;
481 }
482 break;
483 }
484 case '?':
485 {
486 pattern+=GetUTFOctets(pattern);
487 expression+=GetUTFOctets(expression);
488 break;
489 }
490 case '{':
491 {
492 char
493 *target;
494
495 char
496 *p;
497
498 target=AcquireString(pattern);
499 p=target;
500 pattern++;
501 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
502 {
503 *p++=(*pattern++);
504 if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
505 {
506 *p='\0';
507 match=GlobExpression(expression,target,case_insensitive);
508 if (match != MagickFalse)
509 {
510 expression+=MagickMin(strlen(expression),strlen(target));
511 break;
512 }
513 p=target;
514 pattern+=GetUTFOctets(pattern);
515 }
516 }
517 while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
518 pattern+=GetUTFOctets(pattern);
519 if (GetUTFCode(pattern) != 0)
520 pattern+=GetUTFOctets(pattern);
521 target=DestroyString(target);
522 break;
523 }
524 case '\\':
525 {
526 pattern+=GetUTFOctets(pattern);
527 if (GetUTFCode(pattern) == 0)
528 break;
529 magick_fallthrough;
530 }
531 default:
532 {
533 if (case_insensitive != MagickFalse)
534 {
535 if (LocaleToLowercase((int) GetUTFCode(expression)) != LocaleToLowercase((int) GetUTFCode(pattern)))
536 {
537 done=MagickTrue;
538 break;
539 }
540 }
541 else
542 if (GetUTFCode(expression) != GetUTFCode(pattern))
543 {
544 done=MagickTrue;
545 break;
546 }
547 expression+=GetUTFOctets(expression);
548 pattern+=GetUTFOctets(pattern);
549 }
550 }
551 }
552 while (GetUTFCode(pattern) == '*')
553 pattern+=GetUTFOctets(pattern);
554 match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
555 MagickTrue : MagickFalse;
556 return(match);
557}
558
559/*
560%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
561% %
562% %
563% %
564+ I s G l o b %
565% %
566% %
567% %
568%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
569%
570% IsGlob() returns MagickTrue if the path specification contains a globbing
571% pattern.
572%
573% The format of the IsGlob method is:
574%
575% MagickBooleanType IsGlob(const char *geometry)
576%
577% A description of each parameter follows:
578%
579% o path: the path.
580%
581*/
582MagickExport MagickBooleanType IsGlob(const char *path)
583{
584 MagickBooleanType
585 status = MagickFalse;
586
587 const char
588 *p;
589
590 if (IsPathAccessible(path) != MagickFalse)
591 return(MagickFalse);
592 for (p=path; *p != '\0'; p++)
593 {
594 switch (*p)
595 {
596 case '*':
597 case '?':
598 case '{':
599 case '}':
600 case '[':
601 case ']':
602 {
603 status=MagickTrue;
604 break;
605 }
606 default:
607 break;
608 }
609 }
610 return(status);
611}
612
613/*
614%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
615% %
616% %
617% %
618% I s M a g i c k T r u e %
619% %
620% %
621% %
622%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
623%
624% IsMagickTrue() returns MagickTrue if the value is "true", "on", "yes" or
625% "1".
626%
627% The format of the IsMagickTrue method is:
628%
629% MagickBooleanType IsMagickTrue(const char *value)
630%
631% A description of each parameter follows:
632%
633% o option: either MagickTrue or MagickFalse depending on the value
634% parameter.
635%
636% o value: Specifies a pointer to a character array.
637%
638*/
639MagickExport MagickBooleanType IsMagickTrue(const char *value)
640{
641 if (value == (const char *) NULL)
642 return(MagickFalse);
643 if (LocaleCompare(value,"true") == 0)
644 return(MagickTrue);
645 if (LocaleCompare(value,"on") == 0)
646 return(MagickTrue);
647 if (LocaleCompare(value,"yes") == 0)
648 return(MagickTrue);
649 if (LocaleCompare(value,"1") == 0)
650 return(MagickTrue);
651 return(MagickFalse);
652}
653
654/*
655%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
656% %
657% %
658% %
659% T o k e n i z e r %
660% %
661% %
662% %
663%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
664%
665% Tokenizer() is a generalized, finite state token parser. It extracts tokens
666% one at a time from a string of characters. The characters used for white
667% space, for break characters, and for quotes can be specified. Also,
668% characters in the string can be preceded by a specifiable escape character
669% which removes any special meaning the character may have.
670%
671% Here is some terminology:
672%
673% o token: A single unit of information in the form of a group of
674% characters.
675%
676% o white space: Apace that gets ignored (except within quotes or when
677% escaped), like blanks and tabs. in addition, white space terminates a
678% non-quoted token.
679%
680% o break set: One or more characters that separates non-quoted tokens.
681% Commas are a common break character. The usage of break characters to
682% signal the end of a token is the same as that of white space, except
683% multiple break characters with nothing or only white space between
684% generate a null token for each two break characters together.
685%
686% For example, if blank is set to be the white space and comma is set to
687% be the break character, the line
688%
689% A, B, C , , DEF
690%
691% ... consists of 5 tokens:
692%
693% 1) "A"
694% 2) "B"
695% 3) "C"
696% 4) "" (the null string)
697% 5) "DEF"
698%
699% o Quote character: A character that, when surrounding a group of other
700% characters, causes the group of characters to be treated as a single
701% token, no matter how many white spaces or break characters exist in
702% the group. Also, a token always terminates after the closing quote.
703% For example, if ' is the quote character, blank is white space, and
704% comma is the break character, the following string
705%
706% A, ' B, CD'EF GHI
707%
708% ... consists of 4 tokens:
709%
710% 1) "A"
711% 2) " B, CD" (note the blanks & comma)
712% 3) "EF"
713% 4) "GHI"
714%
715% The quote characters themselves do not appear in the resultant
716% tokens. The double quotes are delimiters i use here for
717% documentation purposes only.
718%
719% o Escape character: A character which itself is ignored but which
720% causes the next character to be used as is. ^ and \ are often used
721% as escape characters. An escape in the last position of the string
722% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
723% and non-escape) character. For example, assume white space, break
724% character, and quote are the same as in the above examples, and
725% further, assume that ^ is the escape character. Then, in the string
726%
727% ABC, ' DEF ^' GH' I ^ J K^ L ^
728%
729% ... there are 7 tokens:
730%
731% 1) "ABC"
732% 2) " DEF ' GH"
733% 3) "I"
734% 4) " " (a lone blank)
735% 5) "J"
736% 6) "K L"
737% 7) "^" (passed as is at end of line)
738%
739% The format of the Tokenizer method is:
740%
741% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
742% const size_t max_token_length,const char *line,const char *white,
743% const char *break_set,const char *quote,const char escape,
744% char *breaker,int *next,char *quoted)
745%
746% A description of each parameter follows:
747%
748% o flag: right now, only the low order 3 bits are used.
749%
750% 1 => convert non-quoted tokens to upper case
751% 2 => convert non-quoted tokens to lower case
752% 0 => do not convert non-quoted tokens
753%
754% o token: a character string containing the returned next token
755%
756% o max_token_length: the maximum size of "token". Characters beyond
757% "max_token_length" are truncated.
758%
759% o string: the string to be parsed.
760%
761% o white: a string of the valid white spaces. example:
762%
763% char whitesp[]={" \t"};
764%
765% blank and tab will be valid white space.
766%
767% o break: a string of the valid break characters. example:
768%
769% char breakch[]={";,"};
770%
771% semicolon and comma will be valid break characters.
772%
773% o quote: a string of the valid quote characters. An example would be
774%
775% char whitesp[]={"'\"");
776%
777% (this causes single and double quotes to be valid) Note that a
778% token starting with one of these characters needs the same quote
779% character to terminate it.
780%
781% for example:
782%
783% "ABC '
784%
785% is unterminated, but
786%
787% "DEF" and 'GHI'
788%
789% are properly terminated. Note that different quote characters
790% can appear on the same line; only for a given token do the quote
791% characters have to be the same.
792%
793% o escape: the escape character (NOT a string ... only one
794% allowed). Use zero if none is desired.
795%
796% o breaker: the break character used to terminate the current
797% token. If the token was quoted, this will be the quote used. If
798% the token is the last one on the line, this will be zero.
799%
800% o next: this variable points to the first character of the
801% next token. it gets reset by "tokenizer" as it steps through the
802% string. Set it to 0 upon initialization, and leave it alone
803% after that. You can change it if you want to jump around in the
804% string or re-parse from the beginning, but be careful.
805%
806% o quoted: set to True if the token was quoted and MagickFalse
807% if not. You may need this information (for example: in C, a
808% string with quotes around it is a character string, while one
809% without is an identifier).
810%
811% o result: 0 if we haven't reached EOS (end of string), and 1
812% if we have.
813%
814*/
815
816#define IN_WHITE 0
817#define IN_TOKEN 1
818#define IN_QUOTE 2
819#define IN_OZONE 3
820
821static ssize_t sindex(int c,const char *string)
822{
823 const char
824 *p;
825
826 for (p=string; *p != '\0'; p++)
827 if (c == (int) (*p))
828 return((ssize_t) (p-string));
829 return(-1);
830}
831
832static void StoreToken(TokenInfo *token_info,char *string,
833 size_t max_token_length,int c)
834{
835 ssize_t
836 i;
837
838 if ((token_info->offset < 0) ||
839 ((size_t) token_info->offset >= (max_token_length-1)))
840 return;
841 i=token_info->offset++;
842 string[i]=(char) c;
843 if (token_info->state == IN_QUOTE)
844 return;
845 switch (token_info->flag & 0x03)
846 {
847 case 1:
848 {
849 string[i]=(char) LocaleToUppercase(c);
850 break;
851 }
852 case 2:
853 {
854 string[i]=(char) LocaleToLowercase(c);
855 break;
856 }
857 default:
858 break;
859 }
860}
861
862MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
863 char *token,const size_t max_token_length,const char *line,const char *white,
864 const char *break_set,const char *quote,const char escape,char *breaker,
865 int *next,char *quoted)
866{
867 int
868 c;
869
870 ssize_t
871 i;
872
873 *breaker='\0';
874 *quoted='\0';
875 if (line[*next] == '\0')
876 return(1);
877 token_info->state=IN_WHITE;
878 token_info->quote=(char) MagickFalse;
879 token_info->flag=flag;
880 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
881 {
882 c=(int) line[*next];
883 i=sindex(c,break_set);
884 if (i >= 0)
885 {
886 switch (token_info->state)
887 {
888 case IN_WHITE:
889 case IN_TOKEN:
890 case IN_OZONE:
891 {
892 (*next)++;
893 *breaker=break_set[i];
894 token[token_info->offset]='\0';
895 return(0);
896 }
897 case IN_QUOTE:
898 {
899 StoreToken(token_info,token,max_token_length,c);
900 break;
901 }
902 }
903 continue;
904 }
905 i=sindex(c,quote);
906 if (i >= 0)
907 {
908 switch (token_info->state)
909 {
910 case IN_WHITE:
911 {
912 token_info->state=IN_QUOTE;
913 token_info->quote=quote[i];
914 *quoted=(char) MagickTrue;
915 break;
916 }
917 case IN_QUOTE:
918 {
919 if (quote[i] != token_info->quote)
920 StoreToken(token_info,token,max_token_length,c);
921 else
922 {
923 token_info->state=IN_OZONE;
924 token_info->quote='\0';
925 }
926 break;
927 }
928 case IN_TOKEN:
929 case IN_OZONE:
930 {
931 *breaker=(char) c;
932 token[token_info->offset]='\0';
933 return(0);
934 }
935 }
936 continue;
937 }
938 i=sindex(c,white);
939 if (i >= 0)
940 {
941 switch (token_info->state)
942 {
943 case IN_WHITE:
944 case IN_OZONE:
945 break;
946 case IN_TOKEN:
947 {
948 token_info->state=IN_OZONE;
949 break;
950 }
951 case IN_QUOTE:
952 {
953 StoreToken(token_info,token,max_token_length,c);
954 break;
955 }
956 }
957 continue;
958 }
959 if (c == (int) escape)
960 {
961 if (line[(*next)+1] == '\0')
962 {
963 *breaker='\0';
964 StoreToken(token_info,token,max_token_length,c);
965 (*next)++;
966 token[token_info->offset]='\0';
967 return(0);
968 }
969 switch (token_info->state)
970 {
971 case IN_WHITE:
972 {
973 (*next)--;
974 token_info->state=IN_TOKEN;
975 break;
976 }
977 case IN_TOKEN:
978 case IN_QUOTE:
979 {
980 (*next)++;
981 c=(int) line[*next];
982 StoreToken(token_info,token,max_token_length,c);
983 break;
984 }
985 case IN_OZONE:
986 {
987 token[token_info->offset]='\0';
988 return(0);
989 }
990 }
991 continue;
992 }
993 switch (token_info->state)
994 {
995 case IN_WHITE:
996 {
997 token_info->state=IN_TOKEN;
998 StoreToken(token_info,token,max_token_length,c);
999 break;
1000 }
1001 case IN_TOKEN:
1002 case IN_QUOTE:
1003 {
1004 StoreToken(token_info,token,max_token_length,c);
1005 break;
1006 }
1007 case IN_OZONE:
1008 {
1009 token[token_info->offset]='\0';
1010 return(0);
1011 }
1012 }
1013 }
1014 token[token_info->offset]='\0';
1015 return(0);
1016}