[kaffe] CVS kaffe (robilad): Resynced with GNU Classpath: character conversion fixes

Kaffe CVS cvs-commits at kaffe.org
Sat Feb 5 12:59:05 PST 2005


PatchSet 5980 
Date: 2005/02/05 20:50:01
Author: robilad
Branch: HEAD
Tag: (none) 
Log:
Resynced with GNU Classpath: character conversion fixes

2005-02-05  Dalibor Topic  <robilad at kaffe.org>

        Resynced with GNU Classpath.

        2005-02-03  Robert Schuster  <thebohemian at gmx.net>

        * gnu/java/nio/charset/ISO_8859_1.java,
        gnu/java/nio/charset/US_ASCII.java,
        gnu/java/nio/charset/UTF_16.java,
        gnu/java/nio/charset/UTF_16_LE.java,
        gnu/java/nio/charset/UTF_16_BE.java,
        gnu/java/nio/charset/UTF_8.java: Fixed canonical names
         and aliases according to
         http://www.iana.org/assignments/character-sets,
         http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
         and http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL.
        * gnu/java/nio/charset/Provider.java: Made charset lookup
         case-insensitive which fixes bug #11740.

Members: 
	ChangeLog:1.3518->1.3519 
	libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.2->1.3 
	libraries/javalib/gnu/java/nio/charset/Provider.java:1.1->1.2 
	libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.2->1.3 
	libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.4->1.5 
	libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.4->1.5 
	libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.4->1.5 
	libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.2->1.3 

Index: kaffe/ChangeLog
diff -u kaffe/ChangeLog:1.3518 kaffe/ChangeLog:1.3519
--- kaffe/ChangeLog:1.3518	Sat Feb  5 20:36:22 2005
+++ kaffe/ChangeLog	Sat Feb  5 20:50:01 2005
@@ -1,3 +1,22 @@
+2005-02-05  Dalibor Topic  <robilad at kaffe.org>
+
+	Resynced with GNU Classpath.
+
+	2005-02-03  Robert Schuster  <thebohemian at gmx.net>
+
+        * gnu/java/nio/charset/ISO_8859_1.java,
+        gnu/java/nio/charset/US_ASCII.java,
+        gnu/java/nio/charset/UTF_16.java,
+        gnu/java/nio/charset/UTF_16_LE.java,
+        gnu/java/nio/charset/UTF_16_BE.java,
+        gnu/java/nio/charset/UTF_8.java: Fixed canonical names
+         and aliases according to
+         "http://www.iana.org/assignments/character-sets",
+         "http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html"
+         and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL".
+        * gnu/java/nio/charset/Provider.java: Made charset lookup
+         case-insensitive which fixes bug #11740.
+
 2005-02-05  Sven de Marothy  <sven at physto.se>
 
 	* java/text/SimpleDateFormat.java
Index: kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.2 kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.3
--- kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java:1.2	Mon Nov  8 10:47:13 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/ISO_8859_1.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* ISO_8859_1.java -- 
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -53,7 +53,28 @@
 {
   ISO_8859_1 ()
   {
-    super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
+    /* Canonical charset name chosen according to:
+     * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+     */
+    super ("ISO-8859-1", new String[] {
+        /* These names are provided by 
+         * http://www.iana.org/assignments/character-sets
+         */
+        "iso-ir-100",
+        "ISO_8859-1",
+        "latin1",
+        "l1",
+        "IBM819",
+        "CP819",
+        "csISOLatin1",
+        "8859_1",
+        /* These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+        "ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987",
+        "819"
+        });
+
   }
 
   public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java:1.1 kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java:1.2
--- kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java:1.1	Thu Nov 28 13:39:26 2002
+++ kaffe/libraries/javalib/gnu/java/nio/charset/Provider.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* Provider.java -- 
-   Copyright (C) 2002 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -48,6 +48,7 @@
  * {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
  *
  * @author Jesse Rosenstock
+ * @author Robert Schuster (thebohemian at gmx.net)
  * @see Charset
  */
 public final class Provider extends CharsetProvider
@@ -63,12 +64,14 @@
   }
 
   /**
-   * Map from charset name to charset canonical name.
+   * Map from charset name to charset canonical name. The strings
+   * are all lower-case to allow case-insensitive retrieval of
+   * Charset instances. 
    */
   private final HashMap canonicalNames;
 
   /**
-   * Map from canonical name to Charset.
+   * Map from lower-case canonical name to Charset.
    * TODO: We may want to use soft references.  We would then need to keep
    * track of the class name to regenerate the object.
    */
@@ -76,8 +79,6 @@
 
   private Provider ()
   {
-    // FIXME: We might need to make the name comparison case insensitive.
-    // Verify this with the Sun JDK.
     canonicalNames = new HashMap ();
     charsets = new HashMap ();
 
@@ -106,24 +107,42 @@
                       .iterator ();
   }
 
+  /**
+   * Returns a Charset instance by converting the given
+   * name to lower-case, looking up the canonical charset
+   * name and finally looking up the Charset with that name.
+   * 
+   * <p>The lookup is therefore case-insensitive.</p>
+   * 
+   *  @returns The Charset having <code>charsetName</code>
+   *  as its alias or null if no such Charset exist.
+   */
   public Charset charsetForName (String charsetName)
   {
-    return (Charset) charsets.get (canonicalize (charsetName));
-  }
-
-  private Object canonicalize (String charsetName)
-  {
-    Object o = canonicalNames.get (charsetName);
-    return o == null ? charsetName : o;
+    return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase()));
   }
 
+  /**
+   * Puts a Charset under its canonical name into the 'charsets' map.
+   * Then puts a mapping from all its alias names to the canonical name.
+   * 
+   * <p>All names are converted to lower-case</p>.
+   * 
+   * @param cs
+   */
   private void addCharset (Charset cs)
   {
-    String canonicalName = cs.name ();
+    String canonicalName = cs.name().toLowerCase();
     charsets.put (canonicalName, cs);
+    
+    /* Adds a mapping between the canonical name
+     * itself making a lookup using that name
+     * no special case.
+     */  
+    canonicalNames.put(canonicalName, canonicalName);
 
     for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
-      canonicalNames.put (i.next (), canonicalName);
+      canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName);
   }
 
   public static synchronized Provider provider ()
Index: kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.2 kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.3
--- kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java:1.2	Mon Nov  8 10:47:13 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/US_ASCII.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* US_ASCII.java -- 
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -53,7 +53,29 @@
 {
   US_ASCII ()
   {
-    super ("US-ASCII", new String[]{"ISO646-US"});
+    /* Canonical charset name chosen according to:
+     * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+     */
+    super ("US-ASCII", new String[] {
+        /* These names are provided by 
+         * http://www.iana.org/assignments/character-sets
+         */
+        "iso-ir-6",
+        "ANSI_X3.4-1986",
+        "ISO_646.irv:1991",
+        "ASCII",
+        "ISO646-US",
+        "ASCII",
+        "us",
+        "IBM367",
+        "cp367",
+        "csASCII",
+        /* These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+        "ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646",
+        "windows-20127"
+        });
   }
 
   public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.4 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.5
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java:1.4	Fri Oct 15 10:41:44 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* UTF_16.java -- 
-   Copyright (C) 2002, 2004  Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -51,7 +51,14 @@
 {
   UTF_16 ()
   {
-    super ("UTF-16", null);
+    super ("UTF-16", new String[] {
+        // witnessed by the internet
+        "UTF16",
+        /* These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+        "ISO-10646-UCS-2", "unicode", "csUnicode", "ucs-2"
+    });
   }
 
   public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.4 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.5
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java:1.4	Fri Oct 15 10:41:44 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16BE.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* UTF_16BE.java -- 
-   Copyright (C) 2002, 2004  Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -51,7 +51,18 @@
 {
   UTF_16BE ()
   {
-    super ("UTF-16BE", null);
+    super ("UTF-16BE",  new String[] {
+        // witnessed by the internet
+        "UTF16BE",
+        /* These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+        "x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297",
+        "ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201",
+        "UTF16_BigEndian",
+        // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+        "UnicodeBigUnmarked"
+    });
   }
 
   public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.4 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.5
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java:1.4	Fri Oct 15 10:41:44 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_16LE.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* UTF_16LE.java -- 
-   Copyright (C) 2002, 2004  Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -51,7 +51,17 @@
 {
   UTF_16LE ()
   {
-    super ("UTF-16LE", null);
+    super ("UTF-16LE", new String[] {
+        // witnessed by the internet
+        "UTF16LE", 
+        /* These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+        "x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586",
+        "UTF16_LittleEndian",
+        // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+        "UnicodeLittleUnmarked"
+    });
   }
 
   public boolean contains (Charset cs)
Index: kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java
diff -u kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.2 kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.3
--- kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java:1.2	Mon Nov  8 10:47:13 2004
+++ kaffe/libraries/javalib/gnu/java/nio/charset/UTF_8.java	Sat Feb  5 20:50:05 2005
@@ -1,5 +1,5 @@
 /* UTF_8.java -- 
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004, 2005  Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -62,7 +62,15 @@
 {
   UTF_8 ()
   {
-    super ("UTF-8", null);
+    super ("UTF-8", new String[] {
+        /* These names are provided by
+         * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL
+         */
+        "ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305",
+        "windows-65001", "cp1208",
+        // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html
+        "UTF8"
+    });
   }
 
   public boolean contains (Charset cs)



More information about the kaffe mailing list