最代碼官方的gravatar頭像
最代碼官方2018-03-21 11:34:44
java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件

今天在最代碼下載的一個項目基于ssm+easyui開發的公司員工后臺管理系統,導入IntelliJ IDEA并且設置為java web項目后,編譯的時候發現提示錯誤

Error:(1, 1) java: 非法字符: \65279
Error:(1, 10) java: 需要class, interface或enum

java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件

于是搜索一番發現是因為該java源文件編碼是utf-8 bom文件,需要設置為utf-8 無bom文件,于是通過notepad++替換了2個java源碼文件

java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件

但是發現有幾十個java源碼文件,這樣一個個替換確實太費勁了,于是想到批量轉換,暫時沒發現notepad++有此類插件,百度上發現editplus可以實現,可以參考https://jingyan.baidu.com/article/dca1fa6f4cea7cf1a5405210.html,另外發現通過java語言也可以實現該功能,代碼如下

package com.javaniu.core.util;

import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
 * - changed BOM recognition ordering (longer boms first)
 * 網絡地址:http://koti.mbnet.fi/akini/java/unicodereader/UnicodeReader.java.txt
 * Original pseudocode   : Thomas Weidenfeller
 * Implementation tweaked: Aki Nieminen
 * http://www.unicode.org/unicode/faq/utf_bom.html
 * BOMs:
 * 00 00 FE FF    = UTF-32, big-endian
 * FF FE 00 00    = UTF-32, little-endian
 * EF BB BF       = UTF-8,
 * FE FF          = UTF-16, big-endian
 * FF FE          = UTF-16, little-endian
 * Win2k Notepad:
 * Unicode format = UTF-16LE
 ***/

/**
 * Generic unicode textreader, which will use BOM mark
 * to identify the encoding to be used. If BOM is not found
 * then use a given default or system encoding.
 */
public class UTF8BOMConverter extends Reader {
    PushbackInputStream internalIn;
    InputStreamReader internalIn2 = null;
    String defaultEnc;
    private static final int BOM_SIZE = 4;

    /**
     * @param in         inputstream to be read
     * @param defaultEnc default encoding if stream does not have
     *                   BOM marker. Give NULL to use system-level default.
     */
    UTF8BOMConverter(InputStream in, String defaultEnc) {
        internalIn = new PushbackInputStream(in, BOM_SIZE);
        this.defaultEnc = defaultEnc;
    }

    public String getDefaultEncoding() {
        return defaultEnc;
    }

    /**
     * Get stream encoding or NULL if stream is uninitialized.
     * Call init() or read() method to initialize it.
     */
    public String getEncoding() {
        if (internalIn2 == null) return null;
        return internalIn2.getEncoding();
    }

    /**
     * Read-ahead four bytes and check for BOM marks. Extra bytes are
     * unread back to the stream, only BOM bytes are skipped.
     */
    protected void init() throws IOException {
        if (internalIn2 != null) return;
        String encoding;
        byte bom[] = new byte[BOM_SIZE];
        int n, unread;
        n = internalIn.read(bom, 0, bom.length);
        if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) &&
                (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
            encoding = "UTF-32BE";
            unread = n - 4;
        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) &&
                (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
            encoding = "UTF-32LE";
            unread = n - 4;
        } else if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) &&
                (bom[2] == (byte) 0xBF)) {
            encoding = "UTF-8";
            unread = n - 3;
        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
            encoding = "UTF-16BE";
            unread = n - 2;
        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
            encoding = "UTF-16LE";
            unread = n - 2;
        } else {
            // Unicode BOM mark not found, unread all bytes
            encoding = defaultEnc;
            unread = n;
        }
        //System.out.println("read=" + n + ", unread=" + unread);
        if (unread > 0) internalIn.unread(bom, (n - unread), unread);
        // Use given encoding
        if (encoding == null) {
            internalIn2 = new InputStreamReader(internalIn);
        } else {
            internalIn2 = new InputStreamReader(internalIn, encoding);
        }
    }

    public void close() throws IOException {
        init();
        internalIn2.close();
    }

    public int read(char[] cbuf, int off, int len) throws IOException {
        init();
        return internalIn2.read(cbuf, off, len);
    }


    private static void readContentAndSaveWithEncoding(String filePath, String readEncoding, String saveEncoding) throws Exception {
        saveContent(filePath, readContent(filePath, readEncoding), saveEncoding);
    }

    private static void saveContent(String filePath, String content, String encoding) throws Exception {
        FileOutputStream fos = new FileOutputStream(filePath);
        OutputStreamWriter w = new OutputStreamWriter(fos, encoding);
        w.write(content);
        w.flush();
    }

    private static String readContent(String filePath, String encoding) throws Exception {
        FileInputStream file = new FileInputStream(new File(filePath));
        BufferedReader br = new BufferedReader(new UTF8BOMConverter(file, encoding));
        String line = null;
        String fileContent = "";
        while ((line = br.readLine()) != null) {
            fileContent = fileContent + line;
            fileContent += "\r\n";
        }
        return fileContent;
    }

    private static List<String> getPerlineFileName(String filePath) throws Exception {
        FileInputStream file = new FileInputStream(new File(filePath));
        BufferedReader br = new BufferedReader(new InputStreamReader(file, "UTF-8"));
        String line = null;
        List<String> list = new ArrayList<String>();
        while ((line = br.readLine()) != null) {
            list.add(line);
        }
        return list;
    }


    private static List<String> getAllFilePaths(File filePath, List<String> filePaths) {
        File[] files = filePath.listFiles();
        if (files == null) {
            return filePaths;
        }
        for (File f : files) {
            if (f.isDirectory()) {
                filePaths.add(f.getPath());
                getAllFilePaths(f, filePaths);
            } else {
                filePaths.add(f.getPath());
            }
        }
        return filePaths;
    }

    public static void main(String[] args) throws Exception {
        String suffix = ".java";

        List<String> paths = new ArrayList<String>();
        paths = getAllFilePaths(new File("D:/zuidaima_idea/employee/"), paths);

        List<String> pathList = new ArrayList<String>();
        for (String path : paths) {
            if (path.endsWith(suffix)) {
                pathList.add(path);
            }
        }

        for (String path : pathList) {
            //注意如果是GBK編碼的文件,需要2個參數為GBK,否則文件會亂碼無法恢復
            readContentAndSaveWithEncoding(path, "UTF-8", "UTF-8");
            System.out.println(path + "轉換成功");
        }
    }
}

可以指定文件夾和后綴,這樣程序就可以批量查找到某個文件夾下某種后綴的文件進行編碼轉換了,執行結果如下圖

java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件

該項目已經可以在idea下正常編譯和運行了

java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件

 

也可以通過設置Java Compiler為Eclipse來解決

java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件

 

新版本idea在項目上點擊右鍵有Remove BOM的菜單,點擊也可以實現刪除bom符號的目的

java腳本批量轉換java utf-8 bom源碼文件為utf-8編碼文件


打賞

已有1人打賞

程序猿全敏的gravatar頭像
最近瀏覽
social LV29月10日
星星星星
木雨然 LV79月4日
月亮星星星星星星
藕粉炸雞 LV28月9日
星星星星
ouzhizi LV27月15日
星星星星
1234567891011 LV37月9日
星星星星星星
命運的彼岸花 LV27月3日
星星星星
17391234960 LV96月19日
月亮月亮星星
EternalIy LV36月17日
星星星星星星
qindognaaa LV35月27日
星星星星星星
無情狙擊手 LV35月14日
星星星星星星
頂部客服微信二維碼底部
>掃描二維碼關注最代碼為好友掃描二維碼關注最代碼為好友
海王捕鱼2内购破解版 重庆幸运农场走势app 山东快乐扑克3豹子遗漏 双色球手机版软件 云南11选5技巧稳赚 东软集团股吧 河北快三节目 20019快乐双彩开奖结果 广西11选五胆码 股票配资平台有哪些是正规的 上海快三是合法彩票吗