droy | ef2077b | 2008-07-21 14:51:52 +0000 | [diff] [blame] | 1 | #!/bin/sh |
| 2 | #/******************************************************************************* |
| 3 | # * Copyright (c) 2008 Eclipse Foundation. |
| 4 | # * All rights reserved. This program and the accompanying materials |
| 5 | # * are made available under the terms of the Eclipse Public License v1.0 |
| 6 | # * which accompanies this distribution, and is available at |
| 7 | # * http://www.eclipse.org/legal/epl-v10.html |
| 8 | # * |
| 9 | # * Contributors: |
| 10 | # * Eclipse Foundation - Initial API and implementation |
| 11 | #*******************************************************************************/ |
| 12 | |
| 13 | # This file converts various file formats (such as SHIFT_JIS, EUC_KR, GB2312, CP1252) |
| 14 | # to UTF-8 for importing with import_translation_zip.php |
| 15 | |
| 16 | # This script is not a complete, turnkey solution. You need to examine the |
| 17 | # files, detemine which encodings are used and convert them accordingly. |
| 18 | |
| 19 | echo "Doing German... " |
| 20 | for i in $(find de/ -type f); do |
| 21 | |
| 22 | NONISO=$(file $i | grep -c "Non-ISO extended-ASCII"); |
| 23 | if [ $NONISO -eq 1 ]; then |
| 24 | echo "Need to convert $i" |
| 25 | iconv -f CP1252 -t UTF-8 $i -o ${i}2 |
| 26 | mv -f ${i}2 $i |
| 27 | fi |
| 28 | done |
| 29 | echo "Doing Korean... " |
| 30 | for i in $(find ko/ -type f); do |
| 31 | |
| 32 | ISO8859=$(file $i | grep -c "ISO-8859") |
| 33 | if [ $ISO8859 -eq 1 ]; then |
| 34 | echo "Need to convert $i" |
| 35 | iconv -f EUC-KR -t UTF-8 $i -o ${i}2 |
| 36 | mv -f ${i}2 $i |
| 37 | fi |
| 38 | done |
| 39 | |
| 40 | echo "Doing Japanese... " |
| 41 | for i in $(find ja/ -type f); do |
| 42 | # Non-ISO extended-ASCII text, with CRLF, NEL line terminators |
| 43 | NONISO=$(file $i | grep -c "Non-ISO extended-ASCII"); |
| 44 | if [ $NONISO -eq 1 ]; then |
| 45 | echo "Need to convert $i" |
| 46 | iconv -f SHIFT-JIS -t UTF8 $i -o ${i}2 |
| 47 | mv -f ${i}2 $i |
| 48 | fi |
| 49 | done |
| 50 | |
| 51 | echo "Doing Chinese... " |
| 52 | for i in $(find zh/ -type f); do |
| 53 | |
| 54 | ISO8859=$(file $i | grep -c "ISO-8859") |
| 55 | if [ $ISO8859 -eq 1 ]; then |
| 56 | echo "Need to convert $i" |
| 57 | iconv -f GB2312 -t UTF-8 $i -o ${i}2 |
| 58 | mv -f ${i}2 $i |
| 59 | fi |
| 60 | done |
| 61 | |
| 62 | echo "Doing all others..." |
| 63 | for i in $(find . -type f); do |
| 64 | |
| 65 | ISO8859=$(file $i | grep -c "ISO-8859") |
| 66 | if [ $ISO8859 -eq 1 ]; then |
| 67 | echo "Need to convert $i" |
| 68 | iconv -f ISO-8859-1 -t UTF-8 $i -o ${i}2 |
| 69 | mv -f ${i}2 $i |
| 70 | fi |
| 71 | done |