ICU 76.1  76.1
uniset.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 * Date Name Description
9 * 10/20/99 alan Creation.
10 ***************************************************************************
11 */
12 
13 #ifndef UNICODESET_H
14 #define UNICODESET_H
15 
16 #include "unicode/utypes.h"
17 
18 #if U_SHOW_CPLUSPLUS_API
19 
20 #include "unicode/ucpmap.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/unistr.h"
23 #include "unicode/uset.h"
24 
30 U_NAMESPACE_BEGIN
31 
32 // Forward Declarations.
33 class BMPSet;
34 class ParsePosition;
35 class RBBIRuleScanner;
36 class SymbolTable;
37 class UnicodeSetStringSpan;
38 class UVector;
39 class RuleCharacterIterator;
40 
285 class U_COMMON_API UnicodeSet final : public UnicodeFilter {
286 private:
291  static constexpr int32_t INITIAL_CAPACITY = 25;
292  // fFlags constant
293  static constexpr uint8_t kIsBogus = 1; // This set is bogus (i.e. not valid)
294 
295  UChar32* list = stackList; // MUST be terminated with HIGH
296  int32_t capacity = INITIAL_CAPACITY; // capacity of list
297  int32_t len = 1; // length of list used; 1 <= len <= capacity
298  uint8_t fFlags = 0; // Bit flag (see constants above)
299 
300  BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not nullptr.
301  UChar32* buffer = nullptr; // internal buffer, may be nullptr
302  int32_t bufferCapacity = 0; // capacity of buffer
303 
313  char16_t *pat = nullptr;
314  int32_t patLen = 0;
315 
316  UVector* strings_ = nullptr; // maintained in sorted order
317  UnicodeSetStringSpan *stringSpan = nullptr;
318 
324  UChar32 stackList[INITIAL_CAPACITY];
325 
326 public:
336  inline UBool isBogus() const;
337 
354  void setToBogus();
355 
356 public:
357 
358  enum {
363  MIN_VALUE = 0,
364 
369  MAX_VALUE = 0x10ffff
370  };
371 
372  //----------------------------------------------------------------
373  // Constructors &c
374  //----------------------------------------------------------------
375 
376 public:
377 
383 
393 
394 #ifndef U_HIDE_INTERNAL_API
399  kSerialized /* result of serialize() */
400  };
401 
412  UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
413  ESerialization serialization, UErrorCode &status);
414 #endif /* U_HIDE_INTERNAL_API */
415 
424  UnicodeSet(const UnicodeString& pattern,
425  UErrorCode& status);
426 
427 #ifndef U_HIDE_INTERNAL_API
442  UnicodeSet(const UnicodeString& pattern,
443  uint32_t options,
444  const SymbolTable* symbols,
445  UErrorCode& status);
446 #endif /* U_HIDE_INTERNAL_API */
447 
463  UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
464  uint32_t options,
465  const SymbolTable* symbols,
466  UErrorCode& status);
467 
473 
478  virtual ~UnicodeSet();
479 
486 
498  virtual bool operator==(const UnicodeSet& o) const;
499 
505  inline bool operator!=(const UnicodeSet& o) const;
506 
516  virtual UnicodeSet* clone() const override;
517 
525  virtual int32_t hashCode() const;
526 
535  inline static UnicodeSet *fromUSet(USet *uset);
536 
545  inline static const UnicodeSet *fromUSet(const USet *uset);
546 
554  inline USet *toUSet();
555 
556 
564  inline const USet * toUSet() const;
565 
566 
567  //----------------------------------------------------------------
568  // Freezable API
569  //----------------------------------------------------------------
570 
579  inline UBool isFrozen() const;
580 
595 
605 
606  //----------------------------------------------------------------
607  // Public API
608  //----------------------------------------------------------------
609 
620 
626  static UBool resemblesPattern(const UnicodeString& pattern,
627  int32_t pos);
628 
642  UErrorCode& status);
643 
644 #ifndef U_HIDE_INTERNAL_API
664  uint32_t options,
665  const SymbolTable* symbols,
666  UErrorCode& status);
667 #endif /* U_HIDE_INTERNAL_API */
668 
703  ParsePosition& pos,
704  uint32_t options,
705  const SymbolTable* symbols,
706  UErrorCode& status);
707 
722  UBool escapeUnprintable = false) const override;
723 
747  int32_t value,
748  UErrorCode& ec);
749 
780  const UnicodeString& value,
781  UErrorCode& ec);
782 
795  virtual int32_t size() const;
796 
803  virtual UBool isEmpty() const;
804 
809  UBool hasStrings() const;
810 
818  virtual UBool contains(UChar32 c) const override;
819 
828  virtual UBool contains(UChar32 start, UChar32 end) const;
829 
837  UBool contains(const UnicodeString& s) const;
838 
846  virtual UBool containsAll(const UnicodeSet& c) const;
847 
855  UBool containsAll(const UnicodeString& s) const;
856 
865  UBool containsNone(UChar32 start, UChar32 end) const;
866 
874  UBool containsNone(const UnicodeSet& c) const;
875 
884 
893  inline UBool containsSome(UChar32 start, UChar32 end) const;
894 
902  inline UBool containsSome(const UnicodeSet& s) const;
903 
911  inline UBool containsSome(const UnicodeString& s) const;
912 
931  int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
932 
945  inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
946 
964  int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
965 
979  inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
980 
999  int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
1000 
1018  int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
1019 
1024  virtual UMatchDegree matches(const Replaceable& text,
1025  int32_t& offset,
1026  int32_t limit,
1027  UBool incremental) override;
1028 
1029 private:
1052  static int32_t matchRest(const Replaceable& text,
1053  int32_t start, int32_t limit,
1054  const UnicodeString& s);
1055 
1065  int32_t findCodePoint(UChar32 c) const;
1066 
1067 public:
1068 
1076  virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override;
1077 
1086  int32_t indexOf(UChar32 c) const;
1087 
1103  UChar32 charAt(int32_t index) const;
1104 
1105 #ifndef U_HIDE_DRAFT_API
1123  inline U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const {
1124  return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet());
1125  }
1126 
1149  inline U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const {
1150  return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet());
1151  }
1152 
1173  inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
1174  return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
1175  }
1176 
1200  inline U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const {
1201  return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin();
1202  }
1203 
1212  inline U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const {
1213  return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end();
1214  }
1215 #endif // U_HIDE_DRAFT_API
1216 
1231  virtual UnicodeSet& add(UChar32 start, UChar32 end);
1232 
1244 
1257 
1258  private:
1264  static int32_t getSingleCP(const UnicodeString& s);
1265 
1266  void _add(const UnicodeString& s);
1267 
1268  public:
1278 
1287 
1296 
1305 
1314  static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
1315 
1316 
1324  static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
1325 
1337  virtual UnicodeSet& retain(UChar32 start, UChar32 end);
1338 
1339 
1349 
1361 
1375  virtual UnicodeSet& remove(UChar32 start, UChar32 end);
1376 
1388 
1399 
1413 
1426  virtual UnicodeSet& complement(UChar32 start, UChar32 end);
1427 
1439 
1450 
1463  virtual UnicodeSet& addAll(const UnicodeSet& c);
1464 
1476  virtual UnicodeSet& retainAll(const UnicodeSet& c);
1477 
1489  virtual UnicodeSet& removeAll(const UnicodeSet& c);
1490 
1502 
1509  virtual UnicodeSet& clear();
1510 
1538  UnicodeSet& closeOver(int32_t attribute);
1539 
1547 
1555  virtual int32_t getRangeCount() const;
1556 
1564  virtual UChar32 getRangeStart(int32_t index) const;
1565 
1573  virtual UChar32 getRangeEnd(int32_t index) const;
1574 
1623  int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
1624 
1631  virtual UnicodeSet& compact();
1632 
1644  static UClassID U_EXPORT2 getStaticClassID();
1645 
1654  virtual UClassID getDynamicClassID() const override;
1655 
1656  private:
1657 
1658  // Private API for the USet API
1659 
1660  friend class USetAccess;
1661 
1662  const UnicodeString* getString(int32_t index) const;
1663 
1664  //----------------------------------------------------------------
1665  // RuleBasedTransliterator support
1666  //----------------------------------------------------------------
1667 
1668 private:
1669 
1675  virtual UBool matchesIndexValue(uint8_t v) const override;
1676 
1677 private:
1678  friend class RBBIRuleScanner;
1679 
1680  //----------------------------------------------------------------
1681  // Implementation: Clone as thawed (see ICU4J Freezable)
1682  //----------------------------------------------------------------
1683 
1684  UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
1685  UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
1686 
1687  //----------------------------------------------------------------
1688  // Implementation: Pattern parsing
1689  //----------------------------------------------------------------
1690 
1691  void applyPatternIgnoreSpace(const UnicodeString& pattern,
1692  ParsePosition& pos,
1693  const SymbolTable* symbols,
1694  UErrorCode& status);
1695 
1696  void applyPattern(RuleCharacterIterator& chars,
1697  const SymbolTable* symbols,
1698  UnicodeString& rebuiltPat,
1699  uint32_t options,
1700  UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
1701  int32_t depth,
1702  UErrorCode& ec);
1703 
1704  void closeOverCaseInsensitive(bool simple);
1705  void closeOverAddCaseMappings();
1706 
1707  //----------------------------------------------------------------
1708  // Implementation: Utility methods
1709  //----------------------------------------------------------------
1710 
1711  static int32_t nextCapacity(int32_t minCapacity);
1712 
1713  bool ensureCapacity(int32_t newLen);
1714 
1715  bool ensureBufferCapacity(int32_t newLen);
1716 
1717  void swapBuffers();
1718 
1719  UBool allocateStrings(UErrorCode &status);
1720  int32_t stringsSize() const;
1721  UBool stringsContains(const UnicodeString &s) const;
1722 
1723  UnicodeString& _toPattern(UnicodeString& result,
1724  UBool escapeUnprintable) const;
1725 
1726  UnicodeString& _generatePattern(UnicodeString& result,
1727  UBool escapeUnprintable) const;
1728 
1729  static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
1730 
1731  static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
1732 
1733  static void _appendToPat(UnicodeString &result, UChar32 start, UChar32 end,
1734  UBool escapeUnprintable);
1735 
1736  //----------------------------------------------------------------
1737  // Implementation: Fundamental operators
1738  //----------------------------------------------------------------
1739 
1740  void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
1741 
1742  void add(const UChar32* other, int32_t otherLen, int8_t polarity);
1743 
1744  void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
1745 
1751  static UBool resemblesPropertyPattern(const UnicodeString& pattern,
1752  int32_t pos);
1753 
1754  static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
1755  int32_t iterOpts);
1756 
1796  UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
1797  ParsePosition& ppos,
1798  UErrorCode &ec);
1799 
1800  void applyPropertyPattern(RuleCharacterIterator& chars,
1801  UnicodeString& rebuiltPat,
1802  UErrorCode& ec);
1803 
1808  typedef UBool (*Filter)(UChar32 codePoint, void* context);
1809 
1819  void applyFilter(Filter filter,
1820  void* context,
1821  const UnicodeSet* inclusions,
1822  UErrorCode &status);
1823 
1827  void setPattern(const UnicodeString& newPat) {
1828  setPattern(newPat.getBuffer(), newPat.length());
1829  }
1830  void setPattern(const char16_t *newPat, int32_t newPatLen);
1834  void releasePattern();
1835 
1836  friend class UnicodeSetIterator;
1837 };
1838 
1839 
1840 
1841 inline bool UnicodeSet::operator!=(const UnicodeSet& o) const {
1842  return !operator==(o);
1843 }
1844 
1845 inline UBool UnicodeSet::isFrozen() const {
1846  return bmpSet != nullptr || stringSpan != nullptr;
1847 }
1848 
1849 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
1850  return !containsNone(start, end);
1851 }
1852 
1853 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
1854  return !containsNone(s);
1855 }
1856 
1857 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
1858  return !containsNone(s);
1859 }
1860 
1861 inline UBool UnicodeSet::isBogus() const {
1862  return fFlags & kIsBogus;
1863 }
1864 
1865 inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
1866  return reinterpret_cast<UnicodeSet *>(uset);
1867 }
1868 
1869 inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
1870  return reinterpret_cast<const UnicodeSet *>(uset);
1871 }
1872 
1873 inline USet *UnicodeSet::toUSet() {
1874  return reinterpret_cast<USet *>(this);
1875 }
1876 
1877 inline const USet *UnicodeSet::toUSet() const {
1878  return reinterpret_cast<const USet *>(this);
1879 }
1880 
1881 inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
1882  int32_t sLength=s.length();
1883  if(start<0) {
1884  start=0;
1885  } else if(start>sLength) {
1886  start=sLength;
1887  }
1888  return start+span(s.getBuffer()+start, sLength-start, spanCondition);
1889 }
1890 
1891 inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
1892  int32_t sLength=s.length();
1893  if(limit<0) {
1894  limit=0;
1895  } else if(limit>sLength) {
1896  limit=sLength;
1897  }
1898  return spanBack(s.getBuffer(), limit, spanCondition);
1899 }
1900 
1901 U_NAMESPACE_END
1902 
1903 #endif /* U_SHOW_CPLUSPLUS_API */
1904 
1905 #endif
#define INITIAL_CAPACITY
The initial size of an array if it is unspecified.
Definition: RunArrays.h:32
ParsePosition is a simple class used by Format and its subclasses to keep track of the current positi...
Definition: parsepos.h:52
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
An interface that defines both lookup protocol and parsing of symbolic names.
Definition: symtable.h:59
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:65
virtual UBool matchesIndexValue(uint8_t v) const =0
Returns true if this matcher will match a character c, where c & 0xFF == v, at offset,...
UnicodeSetIterator iterates over the contents of a UnicodeSet.
Definition: usetiter.h:67
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
U_HEADER_NESTED_NAMESPACE::USetStrings strings() const
Returns a C++ "range" for iterating over the empty and multi-character strings of this set.
Definition: uniset.h:1173
virtual UnicodeSet & removeAll(const UnicodeSet &c)
Removes from this set all of its elements that are contained in the specified set.
UnicodeSet * cloneAsThawed() const
Clone the set and make the clone mutable.
virtual UChar32 getRangeEnd(int32_t index) const
Iteration method that returns the last character in the specified range of this set.
UnicodeSet()
Constructs an empty set.
int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const
Returns the start of the trailing substring of the input string which consists only of characters and...
virtual UnicodeSet & complement(UChar32 start, UChar32 end)
Complements the specified range in this set.
UnicodeSet & remove(UChar32 c)
Removes the specified character from this set if it is present.
UnicodeSet(UChar32 start, UChar32 end)
Constructs a set containing the given range.
UnicodeSet & complementAll(const UnicodeString &s)
Complement EACH of the characters in this string.
int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const
Returns the length of the initial substring of the input string which consists only of characters and...
virtual UBool isEmpty() const
Returns true if this set contains no elements.
void setToBogus()
Make this UnicodeSet object invalid.
UnicodeSet & retain(UChar32 c)
Retain the specified character from this set if it is present.
virtual UClassID getDynamicClassID() const override
Implement UnicodeFunctor API.
virtual int32_t size() const
Returns the number of elements in this set (its cardinality).
virtual UnicodeSet & retain(UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
virtual void addMatchSetTo(UnicodeSet &toUnionTo) const override
Implementation of UnicodeMatcher API.
UnicodeSet & retainAll(const UnicodeString &s)
Retains EACH of the characters in this string.
virtual UChar32 getRangeStart(int32_t index) const
Iteration method that returns the first character in the specified range of this set.
UnicodeSet & complement(const UnicodeString &s)
Complement the specified string in this set.
UnicodeSet * freeze()
Freeze the set (make it immutable).
UnicodeSet & addAll(const UnicodeString &s)
Adds each of the characters in this string to the set.
UnicodeSet & remove(const UnicodeString &s)
Removes the specified string from this set if it is present.
int32_t indexOf(UChar32 c) const
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const
Returns a C++ "range" for iterating over the code points of this set.
Definition: uniset.h:1123
virtual UnicodeSet & addAll(const UnicodeSet &c)
Adds all of the elements in the specified set to this set if they're not already present.
U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const
Definition: uniset.h:1212
UnicodeSet(const UnicodeString &pattern, ParsePosition &pos, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Constructs a set from the given pattern.
virtual UnicodeString & toPattern(UnicodeString &result, UBool escapeUnprintable=false) const override
Returns a string representation of this set.
U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const
Returns a C++ iterator for iterating over all of the elements of this set.
Definition: uniset.h:1200
UnicodeSet & applyPattern(const UnicodeString &pattern, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Modifies this set to represent the set specified by the given pattern, optionally ignoring Unicode Pa...
virtual UnicodeSet & complement()
This is equivalent to complement(MIN_VALUE, MAX_VALUE).
UnicodeSet & retain(const UnicodeString &s)
Retains only the specified string from this set if it is present.
U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const
Returns a C++ "range" for iterating over the code point ranges of this set.
Definition: uniset.h:1149
virtual UBool containsAll(const UnicodeSet &c) const
Returns true if this set contains all the characters and strings of the given set.
UnicodeSet & applyPattern(const UnicodeString &pattern, UErrorCode &status)
Modifies this set to represent the set specified by the given pattern, ignoring Unicode Pattern_White...
static UnicodeSet * createFrom(const UnicodeString &s)
Makes a set from a multicharacter string.
UBool containsNone(const UnicodeString &s) const
Returns true if this set contains none of the characters of the given string.
static UnicodeSet * createFromAll(const UnicodeString &s)
Makes a set from each of the characters in the string.
virtual UnicodeSet & retainAll(const UnicodeSet &c)
Retains only the elements in this set that are contained in the specified set.
UnicodeSet(const UnicodeSet &o)
Constructs a set that is identical to the given UnicodeSet.
static UBool resemblesPattern(const UnicodeString &pattern, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
virtual ~UnicodeSet()
Destructs the set.
int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const
Returns the length of the initial substring of the input string which consists only of characters and...
UChar32 charAt(int32_t index) const
Returns the character at the given index within this set, where the set is ordered by ascending code ...
static UClassID getStaticClassID()
Return the class ID for this class.
UnicodeSet & closeOver(int32_t attribute)
Close this set over the given attribute.
UnicodeSet & applyPattern(const UnicodeString &pattern, ParsePosition &pos, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Parses the given pattern, starting at the given position.
UBool containsNone(UChar32 start, UChar32 end) const
Returns true if this set contains none of the characters of the given range.
UnicodeSet & add(UChar32 c)
Adds the specified character to this set if it is not already present.
UnicodeSet & applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode &ec)
Modifies this set to contain those code points which have the given value for the given binary or enu...
virtual UBool contains(UChar32 start, UChar32 end) const
Returns true if this set contains every character of the given range.
virtual int32_t getRangeCount() const
Iteration method that returns the number of ranges contained in this set.
UBool containsAll(const UnicodeString &s) const
Returns true if this set contains all the characters of the given string.
int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode &ec) const
Serializes this set into an array of 16-bit integers.
UnicodeSet & add(const UnicodeString &s)
Adds the specified multicharacter to this set if it is not already present.
UBool contains(const UnicodeString &s) const
Returns true if this set contains the given multicharacter string.
virtual UnicodeSet & removeAllStrings()
Remove all strings from this set.
virtual UnicodeSet * clone() const override
Returns a copy of this object.
virtual int32_t hashCode() const
Returns the hash code value for this set.
virtual bool operator==(const UnicodeSet &o) const
Compares the specified object with this set for equality.
UnicodeSet(const UnicodeString &pattern, UErrorCode &status)
Constructs a set from the given pattern.
UBool hasStrings() const
UnicodeSet & complement(UChar32 c)
Complements the specified character in this set.
int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const
Returns the start of the trailing substring of the input string which consists only of characters and...
virtual UnicodeSet & compact()
Reallocate this objects internal structures to take up the least possible space, without changing thi...
virtual UMatchDegree matches(const Replaceable &text, int32_t &offset, int32_t limit, UBool incremental) override
Implement UnicodeMatcher::matches()
UnicodeSet & removeAll(const UnicodeString &s)
Remove EACH of the characters in this string.
virtual UnicodeSet & add(UChar32 start, UChar32 end)
Adds the specified range to this set if it is not already present.
virtual UBool contains(UChar32 c) const override
Returns true if this set contains the given character.
UnicodeSet(const uint16_t buffer[], int32_t bufferLen, ESerialization serialization, UErrorCode &status)
Constructs a set from the output of serialize().
virtual UnicodeSet & remove(UChar32 start, UChar32 end)
Removes the specified range from this set if it is present.
virtual UnicodeSet & complementAll(const UnicodeSet &c)
Complements in this set all elements contained in the specified set.
UnicodeSet(const UnicodeString &pattern, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Constructs a set from the given pattern.
UnicodeSet & applyPropertyAlias(const UnicodeString &prop, const UnicodeString &value, UErrorCode &ec)
Modifies this set to contain those code points which have the given value for the given property.
UnicodeSet & set(UChar32 start, UChar32 end)
Make this object represent the range start - end.
UnicodeSet & operator=(const UnicodeSet &o)
Assigns this object to be a copy of another.
virtual UnicodeSet & clear()
Removes all of the elements from this set.
UBool containsNone(const UnicodeSet &c) const
Returns true if this set contains none of the characters and strings of the given set.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
int32_t length() const
Return the length of the UnicodeString object.
Definition: unistr.h:4214
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UMatchDegree
Constants returned by UnicodeMatcher::matches() indicating the degree of match.
Definition: unimatch.h:33
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:196
C API: This file defines an abstract map from Unicode code points to integer values.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C++ API: Unicode Filter.
C++ API: Unicode String.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API: Unicode Set.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:185
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:53
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315