MagickCore 6.9.13
Loading...
Searching...
No Matches
token.c
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% %
6% TTTTT OOO K K EEEEE N N %
7% T O O K K E NN N %
8% T O O KKK EEE N N N %
9% T O O K K E N NN %
10% T OOO K K EEEEE N N %
11% %
12% %
13% MagickCore Token Methods %
14% %
15% Software Design %
16% Cristy %
17% January 1993 %
18% %
19% %
20% Copyright 1999 ImageMagick Studio LLC, a non-profit organization %
21% dedicated to making software imaging solutions freely available. %
22% %
23% You may not use this file except in compliance with the License. You may %
24% obtain a copy of the License at %
25% %
26% https://imagemagick.org/license/ %
27% %
28% Unless required by applicable law or agreed to in writing, software %
29% distributed under the License is distributed on an "AS IS" BASIS, %
30% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
31% See the License for the specific language governing permissions and %
32% limitations under the License. %
33% %
34%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35%
36%
37%
38*/
39
40/*
41 Include declarations.
42*/
43#include "magick/studio.h"
44#include "magick/exception.h"
45#include "magick/exception-private.h"
46#include "magick/image.h"
47#include "magick/image-private.h"
48#include "magick/locale-private.h"
49#include "magick/memory_.h"
50#include "magick/string_.h"
51#include "magick/string-private.h"
52#include "magick/token.h"
53#include "magick/token-private.h"
54#include "magick/utility.h"
55
56/*
57 Typedef declarations.
58*/
60{
61 int
62 state;
63
64 MagickStatusType
65 flag;
66
67 ssize_t
68 offset;
69
70 char
71 quote;
72
73 size_t
74 signature;
75};
76
77/*
78%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
79% %
80% %
81% %
82% A c q u i r e T o k e n I n f o %
83% %
84% %
85% %
86%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87%
88% AcquireTokenInfo() allocates the TokenInfo structure.
89%
90% The format of the AcquireTokenInfo method is:
91%
92% TokenInfo *AcquireTokenInfo()
93%
94*/
95MagickExport TokenInfo *AcquireTokenInfo(void)
96{
97 TokenInfo
98 *token_info;
99
100 token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
101 if (token_info == (TokenInfo *) NULL)
102 ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
103 token_info->signature=MagickCoreSignature;
104 return(token_info);
105}
106
107/*
108%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109% %
110% %
111% %
112% D e s t r o y T o k e n I n f o %
113% %
114% %
115% %
116%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117%
118% DestroyTokenInfo() deallocates memory associated with an TokenInfo
119% structure.
120%
121% The format of the DestroyTokenInfo method is:
122%
123% TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124%
125% A description of each parameter follows:
126%
127% o token_info: Specifies a pointer to an TokenInfo structure.
128%
129*/
130MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131{
132 assert(token_info != (TokenInfo *) NULL);
133 assert(token_info->signature == MagickCoreSignature);
134 if (IsEventLogging() != MagickFalse)
135 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
136 token_info->signature=(~MagickCoreSignature);
137 token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138 return(token_info);
139}
140
141/*
142%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143% %
144% %
145% %
146+ G e t N e x t T o k e n %
147% %
148% %
149% %
150%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151%
152% GetNextToken() gets a token from the token stream. A token is defined as
153% a sequence of characters delimited by whitespace (e.g. clip-path), a
154% sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
155% parenthesis (e.g. rgb(0,0,0)). GetNextToken() also recognizes these
156% separator characters: ':', '=', ',', and ';'. GetNextToken() returns the
157% length of the consumed token.
158%
159% The format of the GetNextToken method is:
160%
161% size_t GetNextToken(const char *magick_restrict start,
162% const char **magick_restrict end,const size_t extent,
163% char *magick_restrict token)
164%
165% A description of each parameter follows:
166%
167% o start: the start of the token sequence.
168%
169% o end: point to the end of the token sequence.
170%
171% o extent: maximum extent of the token.
172%
173% o token: copy the token to this buffer.
174%
175*/
176MagickExport magick_hot_spot size_t GetNextToken(
177 const char *magick_restrict start,const char **magick_restrict end,
178 const size_t extent,char *magick_restrict token)
179{
180 double
181 value;
182
183 char
184 *magick_restrict q;
185
186 const char
187 *magick_restrict p;
188
189 ssize_t
190 i;
191
192 assert(start != (const char *) NULL);
193 assert(token != (char *) NULL);
194 i=0;
195 p=start;
196 while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
197 p++;
198 switch (*p)
199 {
200 case '\0':
201 break;
202 case '"':
203 case '\'':
204 case '`':
205 case '{':
206 {
207 char
208 escape;
209
210 switch (*p)
211 {
212 case '"': escape='"'; break;
213 case '\'': escape='\''; break;
214 case '`': escape='\''; break;
215 case '{': escape='}'; break;
216 default: escape=(*p); break;
217 }
218 for (p++; *p != '\0'; p++)
219 {
220 if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
221 p++;
222 else
223 if (*p == escape)
224 {
225 p++;
226 break;
227 }
228 if (i < (ssize_t) (extent-1))
229 token[i++]=(*p);
230 if ((size_t) (p-start) >= (extent-1))
231 break;
232 }
233 break;
234 }
235 case '/':
236 {
237 if (i < (ssize_t) (extent-1))
238 token[i++]=(*p);
239 p++;
240 if ((*p == '>') || (*p == '/'))
241 {
242 if (i < (ssize_t) (extent-1))
243 token[i++]=(*p);
244 p++;
245 }
246 break;
247 }
248 default:
249 {
250 char
251 *q;
252
253 value=StringToDouble(p,&q);
254 (void) value;
255 if ((p != q) && (*p != ','))
256 {
257 for ( ; (p < q) && (*p != ','); p++)
258 {
259 if (i < (ssize_t) (extent-1))
260 token[i++]=(*p);
261 if ((size_t) (p-start) >= (extent-1))
262 break;
263 }
264 if (*p == '%')
265 {
266 if (i < (ssize_t) (extent-1))
267 token[i++]=(*p);
268 p++;
269 }
270 break;
271 }
272 if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
273 (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
274 {
275 if (i < (ssize_t) (extent-1))
276 token[i++]=(*p);
277 p++;
278 break;
279 }
280 for ( ; *p != '\0'; p++)
281 {
282 if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
283 (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
284 break;
285 if ((i > 0) && (*p == '<'))
286 break;
287 if (i < (ssize_t) (extent-1))
288 token[i++]=(*p);
289 if (*p == '>')
290 break;
291 if (*p == '(')
292 {
293 for (p++; *p != '\0'; p++)
294 {
295 if (i < (ssize_t) (extent-1))
296 token[i++]=(*p);
297 if ((*p == ')') && (*(p-1) != '\\'))
298 break;
299 if ((size_t) (p-start) >= (extent-1))
300 break;
301 }
302 if (*p == '\0')
303 break;
304 }
305 if ((size_t) (p-start) >= (extent-1))
306 break;
307 }
308 break;
309 }
310 }
311 token[i]='\0';
312 if (LocaleNCompare(token,"url(#",5) == 0)
313 {
314 q=strrchr(token,')');
315 if (q != (char *) NULL)
316 {
317 *q='\0';
318 (void) memmove(token,token+5,(size_t) (q-token-4));
319 }
320 }
321 while (isspace((int) ((unsigned char) *p)) != 0)
322 p++;
323 if (end != (const char **) NULL)
324 *end=(const char *) p;
325 return(p-start+1);
326}
327
328/*
329%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
330% %
331% %
332% %
333% G l o b E x p r e s s i o n %
334% %
335% %
336% %
337%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
338%
339% GlobExpression() returns MagickTrue if the expression matches the pattern.
340%
341% The format of the GlobExpression function is:
342%
343% MagickBooleanType GlobExpression(const char *magick_restrict expression,
344% const char *magick_restrict pattern,
345% const MagickBooleanType case_insensitive)
346%
347% A description of each parameter follows:
348%
349% o expression: Specifies a pointer to a text string containing a file name.
350%
351% o pattern: Specifies a pointer to a text string containing a pattern.
352%
353% o case_insensitive: set to MagickTrue to ignore the case when matching
354% an expression.
355%
356*/
357
358static MagickBooleanType GlobExpression_(const char *magick_restrict expression,
359 const char *magick_restrict pattern,const MagickBooleanType case_insensitive,
360 const size_t depth)
361{
362 if (depth > MagickMaxRecursionDepth)
363 {
364 errno=EOVERFLOW;
365 return(MagickFalse);
366 }
367 /*
368 Empty pattern or single '*' always matches.
369 */
370 if (pattern == (const char *) NULL)
371 return(MagickTrue);
372 if (GetUTFCode(pattern) == 0)
373 return(MagickTrue);
374 if ((GetUTFCode(pattern) == '*') &&
375 (GetUTFCode(pattern+GetUTFOctets(pattern)) == 0))
376 return(MagickTrue);
377 if ((strchr(pattern,'{') == NULL) &&
378 (strchr(pattern,'*') == NULL) &&
379 (strchr(pattern,'?') == NULL))
380 {
381 char
382 path[MagickPathExtent]= { 0 };
383
384 /*
385 If no glob characters exist, ensure no subimage specifier.
386 */
387 GetPathComponent(pattern,SubimagePath,path);
388 if (*path != '\0')
389 return(MagickFalse);
390 }
391 while (GetUTFCode(pattern) != 0)
392 {
393 int
394 ecode = GetUTFCode(expression),
395 pcode = GetUTFCode(pattern);
396
397 if ((ecode == 0) && (pcode != '*') && (pcode != '{'))
398 break;
399 switch (pcode)
400 {
401 case '*':
402 {
403 do
404 {
405 /*
406 Skip consecutive '*'.
407 */
408 pattern+=GetUTFOctets(pattern);
409 }
410 while (GetUTFCode(pattern) == '*');
411 while (1)
412 {
413 /*
414 Try to match at each position.
415 */
416 if (GlobExpression_(expression,pattern,case_insensitive,depth+1) != MagickFalse)
417 {
418 /*
419 Consume rest of expression and pattern.
420 */
421 while (GetUTFCode(expression) != 0)
422 expression+=GetUTFOctets(expression);
423 while (GetUTFCode(pattern) != 0)
424 pattern+=GetUTFOctets(pattern);
425 return(MagickTrue);
426 }
427 if (GetUTFCode(expression) == 0)
428 break;
429 expression+=GetUTFOctets(expression);
430 }
431 return(MagickFalse);
432 }
433 case '?':
434 {
435 if (ecode == 0)
436 return(MagickFalse);
437 pattern+=GetUTFOctets(pattern);
438 expression+=GetUTFOctets(expression);
439 break;
440 }
441 case '[':
442 {
443 const char
444 *p = pattern+GetUTFOctets(pattern),
445 *q = pattern+GetUTFOctets(pattern);
446
447 MagickBooleanType
448 matched = MagickFalse;
449
450 if (ecode == 0)
451 return(MagickFalse);
452 while ((GetUTFCode(q) != 0) && (GetUTFCode(q) != ']'))
453 q+=GetUTFOctets(q);
454 if (GetUTFCode(q) == 0)
455 return(MagickFalse); /* malformed */
456 while (p < q)
457 {
458 const char
459 *next;
460
461 int
462 code = GetUTFCode(p);
463
464 size_t
465 octets = GetUTFOctets(p);
466
467 if (code == '\\')
468 {
469 p+=octets;
470 code=GetUTFCode(p);
471 octets=GetUTFOctets(p);
472 }
473 next=p+octets;
474 if ((next < q) && (GetUTFCode(next) == '-'))
475 {
476 int
477 ncode;
478
479 next+=GetUTFOctets(next);
480 ncode=GetUTFCode(next);
481 if (ncode == '\\')
482 {
483 next+=GetUTFOctets(next);
484 ncode=GetUTFCode(next);
485 }
486 if ((ecode >= code) && (ecode <= ncode))
487 matched=MagickTrue;
488 p=next+GetUTFOctets(next);
489 }
490 else
491 {
492 if (ecode == code)
493 matched=MagickTrue;
494 p+=octets;
495 }
496 }
497 /*
498 Skip consecutive '*'.
499 */
500 if (matched == MagickFalse)
501 return(MagickFalse);
502 pattern=q+GetUTFOctets(q); /* skip ']' */
503 expression+=GetUTFOctets(expression);
504 break;
505 }
506 case '{':
507 {
508 char
509 *a,
510 *alternative;
511
512 const char
513 *p,
514 *q;
515
516 size_t
517 remaining = MagickPathExtent;
518
519 pattern+=GetUTFOctets(pattern); /* Skip '{' */
520 if (GetUTFCode(pattern) == 0)
521 return(MagickFalse);
522 /*
523 End of brace expression: append remaining pattern.
524 */
525 p=pattern;
526 while ((GetUTFCode(p) != 0) && (GetUTFCode(p) != '}'))
527 {
528#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
529 if (GetUTFCode(p) == '\\')
530 {
531 p+=GetUTFOctets(p);
532 if (GetUTFCode(p) == 0)
533 break;
534 }
535#endif
536 p+=GetUTFOctets(p);
537 }
538 if (GetUTFCode(p) != '}')
539 return(MagickFalse); /* malformed */
540 q=p+GetUTFOctets(p);
541 alternative=AcquireString(pattern);
542 a=alternative;
543 while (1)
544 {
545 int
546 code = GetUTFCode(pattern);
547
548 size_t
549 octets;
550
551 if ((code == 0) || (code == ',') || (code == '}'))
552 {
553 char
554 *subpattern;
555
556 MagickBooleanType
557 match;
558
559 /*
560 Try alternative as a full sub-pattern.
561 */
562 *a='\0';
563 subpattern=AcquireString(alternative);
564 if (ConcatenateString(&subpattern,q) == MagickFalse)
565 {
566 subpattern=DestroyString(subpattern);
567 alternative=DestroyString(alternative);
568 return(MagickFalse);
569 }
570 match=GlobExpression_(expression,subpattern,case_insensitive,
571 depth+1);
572 subpattern=DestroyString(subpattern);
573 if (match != MagickFalse)
574 {
575 /*
576 Consume rest of expression and pattern.
577 */
578 while (GetUTFCode(expression) != 0)
579 expression+=GetUTFOctets(expression);
580 pattern=q;
581 while (GetUTFCode(pattern) != 0)
582 pattern+=GetUTFOctets(pattern);
583 alternative=DestroyString(alternative);
584 return(MagickTrue);
585 }
586 /*
587 Reset buffer for next alternative.
588 */
589 a=alternative;
590 remaining=MagickPathExtent;
591 if (code == ',')
592 {
593 pattern+=GetUTFOctets(pattern); /* skip ',' */
594 continue;
595 }
596 break; /* '}' or end */
597 }
598 /*
599 Copy UTF-8 sequence into alternative.
600 */
601 octets=GetUTFOctets(pattern);
602 if ((octets == 0) || (octets >= remaining))
603 break;
604 (void) memcpy(a,pattern,octets);
605 a+=octets;
606 remaining-=octets;
607 pattern+=octets;
608 }
609 alternative=DestroyString(alternative);
610 return(MagickFalse);
611 }
612#if !defined(MAGICKCORE_WINDOWS_SUPPORT) || defined(__CYGWIN__)
613 case '\\':
614 {
615 pattern+=GetUTFOctets(pattern);
616 if (GetUTFCode(pattern) == 0)
617 return(MagickFalse);
618 magick_fallthrough;
619 }
620#endif
621 default:
622 {
623 int
624 ec = ecode,
625 pc = pcode;
626
627 if (ecode == 0)
628 return(MagickFalse);
629 if (case_insensitive != MagickFalse)
630 {
631 pc=LocaleToLowercase(pc);
632 ec=LocaleToLowercase(ec);
633 }
634 if (pc != ec)
635 return(MagickFalse);
636 pattern+=GetUTFOctets(pattern);
637 expression+=GetUTFOctets(expression);
638 break;
639 }
640 }
641 }
642 while (GetUTFCode(pattern) == '*')
643 pattern+=GetUTFOctets(pattern);
644 return(((GetUTFCode(expression) == 0) &&
645 (GetUTFCode(pattern) == 0)) ? MagickTrue : MagickFalse);
646}
647
648MagickExport MagickBooleanType GlobExpression(
649 const char *magick_restrict expression,const char *magick_restrict pattern,
650 const MagickBooleanType case_insensitive)
651{
652 return(GlobExpression_(expression,pattern,case_insensitive,0));
653}
654
655/*
656%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
657% %
658% %
659% %
660+ I s G l o b %
661% %
662% %
663% %
664%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
665%
666% IsGlob() returns MagickTrue if the path specification contains a globbing
667% pattern.
668%
669% The format of the IsGlob method is:
670%
671% MagickBooleanType IsGlob(const char *geometry)
672%
673% A description of each parameter follows:
674%
675% o path: the path.
676%
677*/
678MagickExport MagickBooleanType IsGlob(const char *path)
679{
680 MagickBooleanType
681 status = MagickFalse;
682
683 const char
684 *p;
685
686 if (IsPathAccessible(path) != MagickFalse)
687 return(MagickFalse);
688 for (p=path; *p != '\0'; p++)
689 {
690 switch (*p)
691 {
692 case '*':
693 case '?':
694 case '{':
695 case '}':
696 case '[':
697 case ']':
698 {
699 status=MagickTrue;
700 break;
701 }
702 default:
703 break;
704 }
705 }
706 return(status);
707}
708
709/*
710%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
711% %
712% %
713% %
714% I s M a g i c k T r u e %
715% %
716% %
717% %
718%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
719%
720% IsMagickTrue() returns MagickTrue if the value is "true", "on", "yes" or
721% "1".
722%
723% The format of the IsMagickTrue method is:
724%
725% MagickBooleanType IsMagickTrue(const char *value)
726%
727% A description of each parameter follows:
728%
729% o option: either MagickTrue or MagickFalse depending on the value
730% parameter.
731%
732% o value: Specifies a pointer to a character array.
733%
734*/
735MagickExport MagickBooleanType IsMagickTrue(const char *value)
736{
737 if (value == (const char *) NULL)
738 return(MagickFalse);
739 if (LocaleCompare(value,"true") == 0)
740 return(MagickTrue);
741 if (LocaleCompare(value,"on") == 0)
742 return(MagickTrue);
743 if (LocaleCompare(value,"yes") == 0)
744 return(MagickTrue);
745 if (LocaleCompare(value,"1") == 0)
746 return(MagickTrue);
747 return(MagickFalse);
748}
749
750/*
751%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
752% %
753% %
754% %
755% T o k e n i z e r %
756% %
757% %
758% %
759%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
760%
761% Tokenizer() is a generalized, finite state token parser. It extracts tokens
762% one at a time from a string of characters. The characters used for white
763% space, for break characters, and for quotes can be specified. Also,
764% characters in the string can be preceded by a specifiable escape character
765% which removes any special meaning the character may have.
766%
767% Here is some terminology:
768%
769% o token: A single unit of information in the form of a group of
770% characters.
771%
772% o white space: Apace that gets ignored (except within quotes or when
773% escaped), like blanks and tabs. in addition, white space terminates a
774% non-quoted token.
775%
776% o break set: One or more characters that separates non-quoted tokens.
777% Commas are a common break character. The usage of break characters to
778% signal the end of a token is the same as that of white space, except
779% multiple break characters with nothing or only white space between
780% generate a null token for each two break characters together.
781%
782% For example, if blank is set to be the white space and comma is set to
783% be the break character, the line
784%
785% A, B, C , , DEF
786%
787% ... consists of 5 tokens:
788%
789% 1) "A"
790% 2) "B"
791% 3) "C"
792% 4) "" (the null string)
793% 5) "DEF"
794%
795% o Quote character: A character that, when surrounding a group of other
796% characters, causes the group of characters to be treated as a single
797% token, no matter how many white spaces or break characters exist in
798% the group. Also, a token always terminates after the closing quote.
799% For example, if ' is the quote character, blank is white space, and
800% comma is the break character, the following string
801%
802% A, ' B, CD'EF GHI
803%
804% ... consists of 4 tokens:
805%
806% 1) "A"
807% 2) " B, CD" (note the blanks & comma)
808% 3) "EF"
809% 4) "GHI"
810%
811% The quote characters themselves do not appear in the resultant
812% tokens. The double quotes are delimiters i use here for
813% documentation purposes only.
814%
815% o Escape character: A character which itself is ignored but which
816% causes the next character to be used as is. ^ and \ are often used
817% as escape characters. An escape in the last position of the string
818% gets treated as a "normal" (i.e., non-quote, non-white, non-break,
819% and non-escape) character. For example, assume white space, break
820% character, and quote are the same as in the above examples, and
821% further, assume that ^ is the escape character. Then, in the string
822%
823% ABC, ' DEF ^' GH' I ^ J K^ L ^
824%
825% ... there are 7 tokens:
826%
827% 1) "ABC"
828% 2) " DEF ' GH"
829% 3) "I"
830% 4) " " (a lone blank)
831% 5) "J"
832% 6) "K L"
833% 7) "^" (passed as is at end of line)
834%
835% The format of the Tokenizer method is:
836%
837% int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
838% const size_t max_token_length,const char *line,const char *white,
839% const char *break_set,const char *quote,const char escape,
840% char *breaker,int *next,char *quoted)
841%
842% A description of each parameter follows:
843%
844% o flag: right now, only the low order 3 bits are used.
845%
846% 1 => convert non-quoted tokens to upper case
847% 2 => convert non-quoted tokens to lower case
848% 0 => do not convert non-quoted tokens
849%
850% o token: a character string containing the returned next token
851%
852% o max_token_length: the maximum size of "token". Characters beyond
853% "max_token_length" are truncated.
854%
855% o string: the string to be parsed.
856%
857% o white: a string of the valid white spaces. example:
858%
859% char whitesp[]={" \t"};
860%
861% blank and tab will be valid white space.
862%
863% o break: a string of the valid break characters. example:
864%
865% char breakch[]={";,"};
866%
867% semicolon and comma will be valid break characters.
868%
869% o quote: a string of the valid quote characters. An example would be
870%
871% char whitesp[]={"'\"");
872%
873% (this causes single and double quotes to be valid) Note that a
874% token starting with one of these characters needs the same quote
875% character to terminate it.
876%
877% for example:
878%
879% "ABC '
880%
881% is unterminated, but
882%
883% "DEF" and 'GHI'
884%
885% are properly terminated. Note that different quote characters
886% can appear on the same line; only for a given token do the quote
887% characters have to be the same.
888%
889% o escape: the escape character (NOT a string ... only one
890% allowed). Use zero if none is desired.
891%
892% o breaker: the break character used to terminate the current
893% token. If the token was quoted, this will be the quote used. If
894% the token is the last one on the line, this will be zero.
895%
896% o next: this variable points to the first character of the
897% next token. it gets reset by "tokenizer" as it steps through the
898% string. Set it to 0 upon initialization, and leave it alone
899% after that. You can change it if you want to jump around in the
900% string or re-parse from the beginning, but be careful.
901%
902% o quoted: set to True if the token was quoted and MagickFalse
903% if not. You may need this information (for example: in C, a
904% string with quotes around it is a character string, while one
905% without is an identifier).
906%
907% o result: 0 if we haven't reached EOS (end of string), and 1
908% if we have.
909%
910*/
911
912#define IN_WHITE 0
913#define IN_TOKEN 1
914#define IN_QUOTE 2
915#define IN_OZONE 3
916
917static ssize_t sindex(int c,const char *string)
918{
919 const char
920 *p;
921
922 for (p=string; *p != '\0'; p++)
923 if (c == (int) (*p))
924 return((ssize_t) (p-string));
925 return(-1);
926}
927
928static void StoreToken(TokenInfo *token_info,char *string,
929 size_t max_token_length,int c)
930{
931 ssize_t
932 i;
933
934 if ((token_info->offset < 0) ||
935 ((size_t) token_info->offset >= (max_token_length-1)))
936 return;
937 i=token_info->offset++;
938 string[i]=(char) c;
939 if (token_info->state == IN_QUOTE)
940 return;
941 switch (token_info->flag & 0x03)
942 {
943 case 1:
944 {
945 string[i]=(char) LocaleToUppercase(c);
946 break;
947 }
948 case 2:
949 {
950 string[i]=(char) LocaleToLowercase(c);
951 break;
952 }
953 default:
954 break;
955 }
956}
957
958MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
959 char *token,const size_t max_token_length,const char *line,const char *white,
960 const char *break_set,const char *quote,const char escape,char *breaker,
961 int *next,char *quoted)
962{
963 int
964 c;
965
966 ssize_t
967 i;
968
969 *breaker='\0';
970 *quoted='\0';
971 if (line[*next] == '\0')
972 return(1);
973 token_info->state=IN_WHITE;
974 token_info->quote=(char) MagickFalse;
975 token_info->flag=flag;
976 for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
977 {
978 c=(int) line[*next];
979 i=sindex(c,break_set);
980 if (i >= 0)
981 {
982 switch (token_info->state)
983 {
984 case IN_WHITE:
985 case IN_TOKEN:
986 case IN_OZONE:
987 {
988 (*next)++;
989 *breaker=break_set[i];
990 token[token_info->offset]='\0';
991 return(0);
992 }
993 case IN_QUOTE:
994 {
995 StoreToken(token_info,token,max_token_length,c);
996 break;
997 }
998 }
999 continue;
1000 }
1001 i=sindex(c,quote);
1002 if (i >= 0)
1003 {
1004 switch (token_info->state)
1005 {
1006 case IN_WHITE:
1007 {
1008 token_info->state=IN_QUOTE;
1009 token_info->quote=quote[i];
1010 *quoted=(char) MagickTrue;
1011 break;
1012 }
1013 case IN_QUOTE:
1014 {
1015 if (quote[i] != token_info->quote)
1016 StoreToken(token_info,token,max_token_length,c);
1017 else
1018 {
1019 token_info->state=IN_OZONE;
1020 token_info->quote='\0';
1021 }
1022 break;
1023 }
1024 case IN_TOKEN:
1025 case IN_OZONE:
1026 {
1027 *breaker=(char) c;
1028 token[token_info->offset]='\0';
1029 return(0);
1030 }
1031 }
1032 continue;
1033 }
1034 i=sindex(c,white);
1035 if (i >= 0)
1036 {
1037 switch (token_info->state)
1038 {
1039 case IN_WHITE:
1040 case IN_OZONE:
1041 break;
1042 case IN_TOKEN:
1043 {
1044 token_info->state=IN_OZONE;
1045 break;
1046 }
1047 case IN_QUOTE:
1048 {
1049 StoreToken(token_info,token,max_token_length,c);
1050 break;
1051 }
1052 }
1053 continue;
1054 }
1055 if (c == (int) escape)
1056 {
1057 if (line[(*next)+1] == '\0')
1058 {
1059 *breaker='\0';
1060 StoreToken(token_info,token,max_token_length,c);
1061 (*next)++;
1062 token[token_info->offset]='\0';
1063 return(0);
1064 }
1065 switch (token_info->state)
1066 {
1067 case IN_WHITE:
1068 {
1069 (*next)--;
1070 token_info->state=IN_TOKEN;
1071 break;
1072 }
1073 case IN_TOKEN:
1074 case IN_QUOTE:
1075 {
1076 (*next)++;
1077 c=(int) line[*next];
1078 StoreToken(token_info,token,max_token_length,c);
1079 break;
1080 }
1081 case IN_OZONE:
1082 {
1083 token[token_info->offset]='\0';
1084 return(0);
1085 }
1086 }
1087 continue;
1088 }
1089 switch (token_info->state)
1090 {
1091 case IN_WHITE:
1092 {
1093 token_info->state=IN_TOKEN;
1094 StoreToken(token_info,token,max_token_length,c);
1095 break;
1096 }
1097 case IN_TOKEN:
1098 case IN_QUOTE:
1099 {
1100 StoreToken(token_info,token,max_token_length,c);
1101 break;
1102 }
1103 case IN_OZONE:
1104 {
1105 token[token_info->offset]='\0';
1106 return(0);
1107 }
1108 }
1109 }
1110 token[token_info->offset]='\0';
1111 return(0);
1112}