1
2
3
4
5
6
7
8
9
10 package net.sf.mmapps.commons.util;
11
12 import java.text.Collator;
13 import java.util.*;
14
15
16 public class EncodingUtil {
17
18 private EncodingUtil() {
19
20 }
21
22 private static final String PLAIN_ASCII =
23 "AaEeIiOoUu"
24 + "AaEeIiOoUuYy"
25 + "AaEeIiOoUuYy"
26 + "AaEeIiOoUuYy"
27 + "AaEeIiOoUuYy"
28 + "Aa"
29 + "Cc"
30 ;
31
32 private static final String UNICODE =
33 "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9"
34 + "\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD"
35 + "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177"
36 + "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177"
37 + "\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF"
38 + "\u00C5\u00E5"
39 + "\u00C7\u00E7"
40 ;
41
42
43 public static String convertNonAscii(String s) {
44 StringBuffer sb = new StringBuffer();
45 int n = s.length();
46 for (int i = 0; i < n; i++) {
47 char c = s.charAt(i);
48 int pos = UNICODE.indexOf(c);
49 if (pos > -1){
50 sb.append(PLAIN_ASCII.charAt(pos));
51 }
52 else {
53 sb.append(c);
54 }
55 }
56 return sb.toString();
57 }
58
59 /***
60 * the byte value of a character >= 128 && <= 159 is unused in ISO-8859-1 but it is used by
61 * Windows Central Europe encoding, so we can detect it and transcode some of it's
62 * characters.
63 */
64 static Map<Character,String> w1252ToISO;
65 static {
66 w1252ToISO = new HashMap<Character,String>();
67 w1252ToISO.put(new Character('\u0080'), "Euro");
68 w1252ToISO.put(new Character('\u0082'), ",");
69 w1252ToISO.put(new Character('\u0083'), "f");
70 w1252ToISO.put(new Character('\u0085'), "...");
71 w1252ToISO.put(new Character('\u0088'), "^");
72 w1252ToISO.put(new Character('\u008B'), "<");
73 w1252ToISO.put(new Character('\u008C'), "OE");
74 w1252ToISO.put(new Character('\u0091'), "'");
75 w1252ToISO.put(new Character('\u0092'), "'");
76 w1252ToISO.put(new Character('\u0093'), "\"");
77 w1252ToISO.put(new Character('\u0094'), "\"");
78 w1252ToISO.put(new Character('\u0095'), ".");
79 w1252ToISO.put(new Character('\u0096'), "-");
80 w1252ToISO.put(new Character('\u0097'), "-");
81 w1252ToISO.put(new Character('\u0098'), "~");
82 w1252ToISO.put(new Character('\u009B'), ">");
83 w1252ToISO.put(new Character('\u009C'), "oe");
84 }
85
86 public static String windows1252ToISO(String windows1252encoded) {
87 StringBuffer transcodedValue = new StringBuffer(windows1252encoded.length());
88 for (int i = 0; i < windows1252encoded.length(); i++) {
89 char curChar = windows1252encoded.charAt(i);
90 if ((curChar >= 128) && (curChar <= 159)) {
91
92
93
94 Character charIndex = new Character(curChar);
95 String replacementStr = w1252ToISO.get(charIndex);
96 if (replacementStr != null) {
97 transcodedValue.append(replacementStr);
98 }
99 }
100 else {
101 transcodedValue.append(curChar);
102 }
103 }
104 return transcodedValue.toString();
105 }
106
107 /*** convert from UTF-8 encoded HTML-Pages -> internal Java String Format
108 * @param s string to conver
109 * @return converted string
110 */
111 public static String convertFromUTF8(String s) {
112 String out = null;
113 try {
114 out = new String(s.getBytes("ISO-8859-1"), "UTF-8");
115 }
116 catch (java.io.UnsupportedEncodingException e) {
117 return null;
118 }
119 return out;
120 }
121
122 /*** convert from internal Java String Format -> UTF-8 encoded HTML/JSP-Pages
123 * @param s string to conver
124 * @return converted string
125 */
126 public static String convertToUTF8(String s) {
127 String out = null;
128 try {
129 out = new String(s.getBytes("UTF-8"));
130 }
131 catch (java.io.UnsupportedEncodingException e) {
132 return null;
133 }
134 return out;
135 }
136
137 public int compareAccentuatedCharacters(String s1, String s2, Locale locale) {
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 Collator collator = Collator.getInstance(locale);
154 collator.setStrength(java.text.Collator.CANONICAL_DECOMPOSITION);
155
156 return collator.compare(s1, s2);
157 }
158
159 public boolean equalAccentuatedCharacters(String s1, String s2, Locale locale) {
160 Collator collator = Collator.getInstance(locale);
161 collator.setStrength(java.text.Collator.CANONICAL_DECOMPOSITION);
162 collator.setStrength(java.text.Collator.SECONDARY);
163 return collator.compare(s1, s2) == 0;
164 }
165
166 }