Index: java/lang/Character.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/lang/Character.java,v retrieving revision 1.36 diff -u -r1.36 Character.java --- java/lang/Character.java 2 Jul 2005 20:32:38 -0000 1.36 +++ java/lang/Character.java 17 Aug 2005 14:05:50 -0000 @@ -1457,6 +1457,57 @@ private static final int MIRROR_MASK = 0x40; /** + * Min value for supplementary code point. + * + * @since 1.5 + */ + public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; + + /** + * Min value for code point. + * + * @since 1.5 + */ + public static final int MIN_CODE_POINT = 0; + + + /** + * Max value for code point. + * + * @since 1.5 + */ + public static final int MAX_CODE_POINT = 0x010ffff; + + + /** + * Minimum high surrrogate code in UTF-16 encoding. + * + * @since 1.5 + */ + public static final char MIN_HIGH_SURROGATE = '\ud800'; + + /** + * Maximum high surrrogate code in UTF-16 encoding. + * + * @since 1.5 + */ + public static final char MAX_HIGH_SURROGATE = '\udbff'; + + /** + * Minimum low surrrogate code in UTF-16 encoding. + * + * @since 1.5 + */ + public static final char MIN_LOW_SURROGATE = '\udc00'; + + /** + * Maximum low surrrogate code in UTF-16 encoding. + * + * @since 1.5 + */ + public static final char MAX_LOW_SURROGATE = '\udfff'; + + /** * Grabs an attribute offset from the Unicode attribute database. The lower * 5 bits are the character type, the next 2 bits are flags, and the top * 9 bits are the offset into the attribute tables. @@ -2249,5 +2300,119 @@ public int compareTo(Object o) { return compareTo((Character) o); + } + + /** + * Converts a unicode code point to a UTF-16 representation of that + * code point. + * + * @param codePoint the unicode code point + * + * @return the UTF-16 representation of that code point + * + * @throws IllegalArgumentException if the code point is not a valid + * unicode code point + * + * @since 1.5 + */ + public static char[] toChars(int codePoint) + { + char[] result = new char[charCount(codePoint)]; + int ignore = toChars(codePoint, result, 0); + return result; + } + + /** + * Converts a unicode code point to its UTF-16 representation. + * + * @param codePoint the unicode code point + * @param dst the target char array + * @param dstIndex the start index for the target + * + * @return number of characters written to dst + * + * @throws IllegalArgumentException if codePoint is not a + * valid unicode code point + * @throws NullPointerException if dst is null + * @throws IndexOutOfBoundsException if dstIndex is not valid + * in dst or if the UTF-16 representation does not + * fit into dst + * + * @since 1.5 + */ + public static int toChars(int codePoint, char[] dst, int dstIndex) + { + if (!isValidCodePoint(codePoint)) + { + throw new IllegalArgumentException("not a valid code point: " + + codePoint); + } + + int result; + if (isSupplementaryCodePoint(codePoint)) + { + // Write second char first to cause IndexOutOfBoundsException + // immediately. + dst[dstIndex + 1] = (char) ((codePoint & 0x3ff) + + (int) MIN_LOW_SURROGATE ); + dst[dstIndex] = (char) ((codePoint >> 10) + (int) MIN_HIGH_SURROGATE); + result = 2; + } + else + { + dst[dstIndex] = (char) codePoint; + result = 1; + } + return result; + } + + /** + * Return number of 16-bit characters required to represent the given + * code point. + * + * @param codePoint a uncode code point + * + * @return 2 if codePoint >= 0x10000, 1 otherwise. + * + * @since 1.5 + */ + public static int charCount(int codePoint) + { + return + (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) + ? 2 + : 1; + } + + /** + * Determines whether the specified code point is + * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode + * supplementary character range. + * + * @param codePoint a Unicode code point + * + * @return true if code point is in supplementary range + * + * @since 1.5 + */ + public static boolean isSupplementaryCodePoint(int codePoint) + { + return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT + && codePoint <= MAX_CODE_POINT; + } + + /** + * Determines whether the specified code point is + * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point. + * + * @param codePoint a Unicode code point + * + * @return true if code point is valid + * + * @since 1.5 + */ + public static boolean isValidCodePoint(int codePoint) + { + return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; } } // class Character