ICU 50.1.2  50.1.2
messagepattern.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 * Copyright (C) 2011-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: messagepattern.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2011mar14
12 * created by: Markus W. Scherer
13 */
14 
15 #ifndef __MESSAGEPATTERN_H__
16 #define __MESSAGEPATTERN_H__
17 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_FORMATTING
26 
27 #include "unicode/parseerr.h"
28 #include "unicode/unistr.h"
29 
88 };
93 
203 };
208 
256 };
261 
267 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
268  ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
269 
270 enum {
277 
286 };
287 
294 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
295 
297 
298 class MessagePatternDoubleList;
299 class MessagePatternPartsList;
300 
358 public:
367  MessagePattern(UErrorCode &errorCode);
368 
379 
398  MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
399 
405  MessagePattern(const MessagePattern &other);
406 
413  MessagePattern &operator=(const MessagePattern &other);
414 
419  virtual ~MessagePattern();
420 
438  MessagePattern &parse(const UnicodeString &pattern,
439  UParseError *parseError, UErrorCode &errorCode);
440 
458  MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
459  UParseError *parseError, UErrorCode &errorCode);
460 
478  MessagePattern &parsePluralStyle(const UnicodeString &pattern,
479  UParseError *parseError, UErrorCode &errorCode);
480 
498  MessagePattern &parseSelectStyle(const UnicodeString &pattern,
499  UParseError *parseError, UErrorCode &errorCode);
500 
506  void clear();
507 
515  clear();
516  aposMode=mode;
517  }
518 
524  UBool operator==(const MessagePattern &other) const;
525 
531  inline UBool operator!=(const MessagePattern &other) const {
532  return !operator==(other);
533  }
534 
539  int32_t hashCode() const;
540 
546  return aposMode;
547  }
548 
549  // Java has package-private jdkAposMode() here.
550  // In C++, this is declared in the MessageImpl class.
551 
557  return msg;
558  }
559 
566  return hasArgNames;
567  }
568 
575  return hasArgNumbers;
576  }
577 
589  static int32_t validateArgumentName(const UnicodeString &name);
590 
601  UnicodeString autoQuoteApostropheDeep() const;
602 
603  class Part;
604 
611  int32_t countParts() const {
612  return partsLength;
613  }
614 
621  const Part &getPart(int32_t i) const {
622  return parts[i];
623  }
624 
633  return getPart(i).type;
634  }
635 
643  int32_t getPatternIndex(int32_t partIndex) const {
644  return getPart(partIndex).index;
645  }
646 
654  UnicodeString getSubstring(const Part &part) const {
655  return msg.tempSubString(part.index, part.length);
656  }
657 
665  UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
666  return 0==msg.compare(part.index, part.length, s);
667  }
668 
675  double getNumericValue(const Part &part) const;
676 
683  double getPluralOffset(int32_t pluralStart) const;
684 
693  int32_t getLimitPartIndex(int32_t start) const {
694  int32_t limit=getPart(start).limitPartIndex;
695  if(limit<start) {
696  return start;
697  }
698  return limit;
699  }
700 
708  class Part : public UMemory {
709  public:
714  Part() {}
715 
722  return type;
723  }
724 
730  int32_t getIndex() const {
731  return index;
732  }
733 
740  int32_t getLength() const {
741  return length;
742  }
743 
750  int32_t getLimit() const {
751  return index+length;
752  }
753 
760  int32_t getValue() const {
761  return value;
762  }
763 
771  UMessagePatternPartType type=getType();
773  return (UMessagePatternArgType)value;
774  } else {
775  return UMSGPAT_ARG_TYPE_NONE;
776  }
777  }
778 
788  }
789 
795  UBool operator==(const Part &other) const;
796 
802  inline UBool operator!=(const Part &other) const {
803  return !operator==(other);
804  }
805 
810  int32_t hashCode() const {
811  return ((type*37+index)*37+length)*37+value;
812  }
813 
814  private:
815  friend class MessagePattern;
816 
817  static const int32_t MAX_LENGTH=0xffff;
818  static const int32_t MAX_VALUE=0x7fff;
819 
820  // Some fields are not final because they are modified during pattern parsing.
821  // After pattern parsing, the parts are effectively immutable.
823  int32_t index;
824  uint16_t length;
825  int16_t value;
826  int32_t limitPartIndex;
827  };
828 
829 private:
830  void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
831 
832  void postParse();
833 
834  int32_t parseMessage(int32_t index, int32_t msgStartLength,
835  int32_t nestingLevel, UMessagePatternArgType parentType,
836  UParseError *parseError, UErrorCode &errorCode);
837 
838  int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
839  UParseError *parseError, UErrorCode &errorCode);
840 
841  int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
842 
843  int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
844  UParseError *parseError, UErrorCode &errorCode);
845 
846  int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
847  UParseError *parseError, UErrorCode &errorCode);
848 
857  static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
858 
859  int32_t parseArgNumber(int32_t start, int32_t limit) {
860  return parseArgNumber(msg, start, limit);
861  }
862 
871  void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
872  UParseError *parseError, UErrorCode &errorCode);
873 
874  // Java has package-private appendReducedApostrophes() here.
875  // In C++, this is declared in the MessageImpl class.
876 
877  int32_t skipWhiteSpace(int32_t index);
878 
879  int32_t skipIdentifier(int32_t index);
880 
885  int32_t skipDouble(int32_t index);
886 
887  static UBool isArgTypeChar(UChar32 c);
888 
889  UBool isChoice(int32_t index);
890 
891  UBool isPlural(int32_t index);
892 
893  UBool isSelect(int32_t index);
894 
895  UBool isOrdinal(int32_t index);
896 
901  UBool inMessageFormatPattern(int32_t nestingLevel);
902 
907  UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
908 
909  void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
910  int32_t value, UErrorCode &errorCode);
911 
912  void addLimitPart(int32_t start,
913  UMessagePatternPartType type, int32_t index, int32_t length,
914  int32_t value, UErrorCode &errorCode);
915 
916  void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
917 
918  void setParseError(UParseError *parseError, int32_t index);
919 
920  // No ICU "poor man's RTTI" for this class nor its subclasses.
921  virtual UClassID getDynamicClassID() const;
922 
923  UBool init(UErrorCode &errorCode);
924  UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
925 
927  UnicodeString msg;
928  // ArrayList<Part> parts=new ArrayList<Part>();
929  MessagePatternPartsList *partsList;
930  Part *parts;
931  int32_t partsLength;
932  // ArrayList<Double> numericValues;
933  MessagePatternDoubleList *numericValuesList;
934  double *numericValues;
935  int32_t numericValuesLength;
936  UBool hasArgNames;
937  UBool hasArgNumbers;
938  UBool needsAutoQuoting;
939 };
940 
942 
943 #endif // !UCONFIG_NO_FORMATTING
944 
945 #endif // __MESSAGEPATTERN_H__
The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
An integer value, for example the offset or an explicit selector value in a PluralFormat style...
A numeric value, for example the offset or an explicit selector value in a PluralFormat style...
const Part & getPart(int32_t i) const
Gets the i-th pattern &quot;part&quot;.
A literal apostrophe is represented by either a single or a double apostrophe pattern character...
The argument name.
int32_t getLimitPartIndex(int32_t start) const
Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start...
int32_t getIndex() const
Returns the pattern string index associated with this Part.
UBool operator!=(const MessagePattern &other) const
virtual UClassID getDynamicClassID() const =0
ICU4C &quot;poor man&#39;s RTTI&quot;, returns a UClassID for the actual ICU class.
C++ API: Unicode String.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
The argument has no specified type.
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const
Compares the part&#39;s substring with the input string s.
int32_t getLimit() const
Returns the pattern string limit (exclusive-end) index associated with this Part. ...
UBool hasNumberedArguments() const
Does the parsed pattern have numbered arguments like {2}?
void * UClassID
UClassID is used to identify classes without using the compiler&#39;s RTTI.
Definition: uobject.h:96
The argument style text.
UBool hasNamedArguments() const
Does the parsed pattern have named arguments like {first_name}?
static UBool hasNumericValue(UMessagePatternPartType type)
Indicates whether the Part type has a numeric value.
UMessagePatternPartType
MessagePattern::Part type constants.
The argument number, provided by the value.
The argument is an ordinal-number PluralFormat with the same style parts sequence and semantics as UM...
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)
Clears this MessagePattern and sets the UMessagePatternApostropheMode.
Parses and represents ICU MessageFormat patterns.
The argument is a ChoiceFormat with one or more ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
UBool operator!=(const Part &other) const
const UnicodeString & getPatternString() const
UMessagePatternApostropheMode getApostropheMode() const
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
Return value from MessagePattern.validateArgumentName() for when the string is a valid &quot;pattern ident...
int32_t countParts() const
Returns the number of &quot;parts&quot; created by parsing the pattern string.
A literal apostrophe must be represented by a double apostrophe pattern character.
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:298
Indicates that a syntax character needs to be inserted for auto-quoting.
UnicodeString getSubstring(const Part &part) const
Returns the substring of the pattern string indicated by the Part.
int32_t getPatternIndex(int32_t partIndex) const
Returns the pattern index of the specified pattern &quot;part&quot;.
Return value from MessagePattern.validateArgumentName() for when the string is invalid.
Start of a message pattern (main or nested).
End of an argument.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
C API: Parse Error Information.
End of a message pattern (main or nested).
A selector substring in a &quot;complex&quot; argument style.
Indicates a substring of the pattern string which is to be skipped when formatting.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
Part()
Default constructor, do not use.
A message pattern &quot;part&quot;, representing a pattern parsing event.
Indicates a syntactic (non-escaped) # symbol in a plural variant.
int32_t getValue() const
Returns a value associated with this part.
The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset (e...
The argument type.
UMessagePatternArgType
Argument type constants.
UMessagePatternPartType getPartType(int32_t i) const
Returns the UMessagePatternPartType of the i-th pattern &quot;part&quot;.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside...
Definition: utypes.h:357
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:246
int32_t getLength() const
Returns the length of the pattern substring associated with this Part.
The argument has a &quot;simple&quot; type which is provided by the ARG_TYPE part.
Start of an argument.
UObject is the common ICU &quot;boilerplate&quot; class.
Definition: uobject.h:229
UMemory is the common ICU base class.
Definition: uobject.h:115
UMessagePatternApostropheMode
Mode for when an apostrophe starts quoted literal text for MessageFormat output.
UMessagePatternPartType getType() const
Returns the type of this part.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:200
int32_t hashCode() const
UMessagePatternArgType getArgType() const
Returns the argument type if this part is of type ARG_START or ARG_LIMIT, otherwise UMSGPAT_ARG_TYPE_...