|
@@ -0,0 +1,883 @@
|
|
|
+//
|
|
|
+// Source code recreated from a .class file by IntelliJ IDEA
|
|
|
+// (powered by Fernflower decompiler)
|
|
|
+//
|
|
|
+
|
|
|
+package com.zglc.fm.utils;
|
|
|
+
|
|
|
+import java.io.Closeable;
|
|
|
+import java.io.File;
|
|
|
+import java.io.FileInputStream;
|
|
|
+import java.io.InputStream;
|
|
|
+import java.net.URL;
|
|
|
+
|
|
|
+public class EncodingDetect {
|
|
|
+ public EncodingDetect() {
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String detect(String path) {
|
|
|
+ EncodingDetect.BytesEncodingDetect s = new EncodingDetect.BytesEncodingDetect();
|
|
|
+ return EncodingDetect.BytesEncodingDetect.javaname[s.detectEncoding(new File(path))];
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String detect(File file) {
|
|
|
+ EncodingDetect.BytesEncodingDetect s = new EncodingDetect.BytesEncodingDetect();
|
|
|
+ return EncodingDetect.BytesEncodingDetect.javaname[s.detectEncoding(file)];
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String detect(byte[] contents) {
|
|
|
+ EncodingDetect.BytesEncodingDetect s = new EncodingDetect.BytesEncodingDetect();
|
|
|
+ return EncodingDetect.BytesEncodingDetect.javaname[s.detectEncoding(contents)];
|
|
|
+ }
|
|
|
+
|
|
|
+ public static String detect(URL url) {
|
|
|
+ EncodingDetect.BytesEncodingDetect s = new EncodingDetect.BytesEncodingDetect();
|
|
|
+ return EncodingDetect.BytesEncodingDetect.javaname[s.detectEncoding(url)];
|
|
|
+ }
|
|
|
+
|
|
|
+ static class Encoding {
|
|
|
+ public static int GB2312 = 0;
|
|
|
+ public static int GBK = 1;
|
|
|
+ public static int GB18030 = 2;
|
|
|
+ public static int HZ = 3;
|
|
|
+ public static int BIG5 = 4;
|
|
|
+ public static int CNS11643 = 5;
|
|
|
+ public static int UTF8 = 6;
|
|
|
+ public static int UTF8T = 7;
|
|
|
+ public static int UTF8S = 8;
|
|
|
+ public static int UNICODE = 9;
|
|
|
+ public static int UNICODET = 10;
|
|
|
+ public static int UNICODES = 11;
|
|
|
+ public static int ISO2022CN = 12;
|
|
|
+ public static int ISO2022CN_CNS = 13;
|
|
|
+ public static int ISO2022CN_GB = 14;
|
|
|
+ public static int EUC_KR = 15;
|
|
|
+ public static int CP949 = 16;
|
|
|
+ public static int ISO2022KR = 17;
|
|
|
+ public static int JOHAB = 18;
|
|
|
+ public static int SJIS = 19;
|
|
|
+ public static int EUC_JP = 20;
|
|
|
+ public static int ISO2022JP = 21;
|
|
|
+ public static int ASCII = 22;
|
|
|
+ public static int OTHER = 23;
|
|
|
+ public static int TOTALTYPES = 24;
|
|
|
+ public static final int SIMP = 0;
|
|
|
+ public static final int TRAD = 1;
|
|
|
+ public static String[] javaname;
|
|
|
+ public static String[] nicename;
|
|
|
+ public static String[] htmlname;
|
|
|
+
|
|
|
+ public Encoding() {
|
|
|
+ javaname = new String[TOTALTYPES];
|
|
|
+ nicename = new String[TOTALTYPES];
|
|
|
+ htmlname = new String[TOTALTYPES];
|
|
|
+ javaname[GB2312] = "GB2312";
|
|
|
+ javaname[GBK] = "GBK";
|
|
|
+ javaname[GB18030] = "GB18030";
|
|
|
+ javaname[HZ] = "ASCII";
|
|
|
+ javaname[ISO2022CN_GB] = "ISO2022CN_GB";
|
|
|
+ javaname[BIG5] = "BIG5";
|
|
|
+ javaname[CNS11643] = "EUC-TW";
|
|
|
+ javaname[ISO2022CN_CNS] = "ISO2022CN_CNS";
|
|
|
+ javaname[ISO2022CN] = "ISO2022CN";
|
|
|
+ javaname[UTF8] = "UTF-8";
|
|
|
+ javaname[UTF8T] = "UTF-8";
|
|
|
+ javaname[UTF8S] = "UTF-8";
|
|
|
+ javaname[UNICODE] = "Unicode";
|
|
|
+ javaname[UNICODET] = "Unicode";
|
|
|
+ javaname[UNICODES] = "Unicode";
|
|
|
+ javaname[EUC_KR] = "EUC_KR";
|
|
|
+ javaname[CP949] = "MS949";
|
|
|
+ javaname[ISO2022KR] = "ISO2022KR";
|
|
|
+ javaname[JOHAB] = "Johab";
|
|
|
+ javaname[SJIS] = "SJIS";
|
|
|
+ javaname[EUC_JP] = "EUC_JP";
|
|
|
+ javaname[ISO2022JP] = "ISO2022JP";
|
|
|
+ javaname[ASCII] = "ASCII";
|
|
|
+ javaname[OTHER] = "ISO8859_1";
|
|
|
+ htmlname[GB2312] = "GB2312";
|
|
|
+ htmlname[GBK] = "GBK";
|
|
|
+ htmlname[GB18030] = "GB18030";
|
|
|
+ htmlname[HZ] = "HZ-GB-2312";
|
|
|
+ htmlname[ISO2022CN_GB] = "ISO-2022-CN-EXT";
|
|
|
+ htmlname[BIG5] = "BIG5";
|
|
|
+ htmlname[CNS11643] = "EUC-TW";
|
|
|
+ htmlname[ISO2022CN_CNS] = "ISO-2022-CN-EXT";
|
|
|
+ htmlname[ISO2022CN] = "ISO-2022-CN";
|
|
|
+ htmlname[UTF8] = "UTF-8";
|
|
|
+ htmlname[UTF8T] = "UTF-8";
|
|
|
+ htmlname[UTF8S] = "UTF-8";
|
|
|
+ htmlname[UNICODE] = "UTF-16";
|
|
|
+ htmlname[UNICODET] = "UTF-16";
|
|
|
+ htmlname[UNICODES] = "UTF-16";
|
|
|
+ htmlname[EUC_KR] = "EUC-KR";
|
|
|
+ htmlname[CP949] = "x-windows-949";
|
|
|
+ htmlname[ISO2022KR] = "ISO-2022-KR";
|
|
|
+ htmlname[JOHAB] = "x-Johab";
|
|
|
+ htmlname[SJIS] = "Shift_JIS";
|
|
|
+ htmlname[EUC_JP] = "EUC-JP";
|
|
|
+ htmlname[ISO2022JP] = "ISO-2022-JP";
|
|
|
+ htmlname[ASCII] = "ASCII";
|
|
|
+ htmlname[OTHER] = "ISO8859-1";
|
|
|
+ nicename[GB2312] = "GB-2312";
|
|
|
+ nicename[GBK] = "GBK";
|
|
|
+ nicename[GB18030] = "GB18030";
|
|
|
+ nicename[HZ] = "HZ";
|
|
|
+ nicename[ISO2022CN_GB] = "ISO2022CN-GB";
|
|
|
+ nicename[BIG5] = "Big5";
|
|
|
+ nicename[CNS11643] = "CNS11643";
|
|
|
+ nicename[ISO2022CN_CNS] = "ISO2022CN-CNS";
|
|
|
+ nicename[ISO2022CN] = "ISO2022 CN";
|
|
|
+ nicename[UTF8] = "UTF-8";
|
|
|
+ nicename[UTF8T] = "UTF-8 (Trad)";
|
|
|
+ nicename[UTF8S] = "UTF-8 (Simp)";
|
|
|
+ nicename[UNICODE] = "Unicode";
|
|
|
+ nicename[UNICODET] = "Unicode (Trad)";
|
|
|
+ nicename[UNICODES] = "Unicode (Simp)";
|
|
|
+ nicename[EUC_KR] = "EUC-KR";
|
|
|
+ nicename[CP949] = "CP949";
|
|
|
+ nicename[ISO2022KR] = "ISO 2022 KR";
|
|
|
+ nicename[JOHAB] = "Johab";
|
|
|
+ nicename[SJIS] = "Shift-JIS";
|
|
|
+ nicename[EUC_JP] = "EUC-JP";
|
|
|
+ nicename[ISO2022JP] = "ISO 2022 JP";
|
|
|
+ nicename[ASCII] = "ASCII";
|
|
|
+ nicename[OTHER] = "OTHER";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ static class BytesEncodingDetect extends EncodingDetect.Encoding {
|
|
|
+ int[][] GBFreq = new int[94][94];
|
|
|
+ int[][] GBKFreq = new int[126][191];
|
|
|
+ int[][] Big5Freq = new int[94][158];
|
|
|
+ int[][] Big5PFreq = new int[126][191];
|
|
|
+ int[][] EUC_TWFreq = new int[94][94];
|
|
|
+ int[][] KRFreq = new int[94][94];
|
|
|
+ int[][] JPFreq = new int[94][94];
|
|
|
+ boolean debug = false;
|
|
|
+
|
|
|
+ BytesEncodingDetect() {
|
|
|
+ this.initialize_frequencies();
|
|
|
+ }
|
|
|
+
|
|
|
+ int detectEncoding(URL testurl) {
|
|
|
+ byte[] rawText = new byte[10000];
|
|
|
+ int byteOffset = 0;
|
|
|
+ InputStream is = null;
|
|
|
+
|
|
|
+ int guess;
|
|
|
+ try {
|
|
|
+ int bytesRead;
|
|
|
+ for(is = testurl.openStream(); (bytesRead = is.read(rawText, byteOffset, rawText.length - byteOffset)) > 0; byteOffset += bytesRead) {
|
|
|
+ }
|
|
|
+
|
|
|
+ guess = this.detectEncoding(rawText);
|
|
|
+ } catch (Exception var11) {
|
|
|
+ System.err.println("Error loading or using URL " + var11.toString());
|
|
|
+ guess = -1;
|
|
|
+ } finally {
|
|
|
+ FileUtils.closeAllStream(new Closeable[]{is});
|
|
|
+ }
|
|
|
+
|
|
|
+ return guess;
|
|
|
+ }
|
|
|
+
|
|
|
+ int detectEncoding(File testfile) {
|
|
|
+ byte[] rawtext = new byte[(int)testfile.length()];
|
|
|
+
|
|
|
+ try {
|
|
|
+ FileInputStream fileis = new FileInputStream(testfile);
|
|
|
+ fileis.read(rawtext);
|
|
|
+ fileis.close();
|
|
|
+ } catch (Exception var5) {
|
|
|
+ System.err.println("Error: " + var5);
|
|
|
+ System.err.println();
|
|
|
+ }
|
|
|
+
|
|
|
+ return this.detectEncoding(rawtext);
|
|
|
+ }
|
|
|
+
|
|
|
+ int detectEncoding(byte[] rawtext) {
|
|
|
+ int maxscore = 0;
|
|
|
+ int encoding_guess = OTHER;
|
|
|
+ int[] scores = new int[TOTALTYPES];
|
|
|
+ scores[GB2312] = this.gb2312_probability(rawtext);
|
|
|
+ scores[GBK] = this.gbk_probability(rawtext);
|
|
|
+ scores[GB18030] = this.gb18030_probability(rawtext);
|
|
|
+ scores[HZ] = this.hz_probability(rawtext);
|
|
|
+ scores[BIG5] = this.big5_probability(rawtext);
|
|
|
+ scores[CNS11643] = this.euc_tw_probability(rawtext);
|
|
|
+ scores[ISO2022CN] = this.iso_2022_cn_probability(rawtext);
|
|
|
+ scores[UTF8] = this.utf8_probability(rawtext);
|
|
|
+ scores[UNICODE] = this.utf16_probability(rawtext);
|
|
|
+ scores[EUC_KR] = this.euc_kr_probability(rawtext);
|
|
|
+ scores[CP949] = this.cp949_probability(rawtext);
|
|
|
+ scores[JOHAB] = 0;
|
|
|
+ scores[ISO2022KR] = this.iso_2022_kr_probability(rawtext);
|
|
|
+ scores[ASCII] = this.ascii_probability(rawtext);
|
|
|
+ scores[SJIS] = this.sjis_probability(rawtext);
|
|
|
+ scores[EUC_JP] = this.euc_jp_probability(rawtext);
|
|
|
+ scores[ISO2022JP] = this.iso_2022_jp_probability(rawtext);
|
|
|
+ scores[UNICODET] = 0;
|
|
|
+ scores[UNICODES] = 0;
|
|
|
+ scores[ISO2022CN_GB] = 0;
|
|
|
+ scores[ISO2022CN_CNS] = 0;
|
|
|
+ scores[OTHER] = 0;
|
|
|
+
|
|
|
+ for(int index = 0; index < TOTALTYPES; ++index) {
|
|
|
+ if (this.debug) {
|
|
|
+ System.err.println("Encoding " + nicename[index] + " score " + scores[index]);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (scores[index] > maxscore) {
|
|
|
+ encoding_guess = index;
|
|
|
+ maxscore = scores[index];
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (maxscore <= 50) {
|
|
|
+ encoding_guess = OTHER;
|
|
|
+ }
|
|
|
+
|
|
|
+ return encoding_guess;
|
|
|
+ }
|
|
|
+
|
|
|
+ int gb2312_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int gbchars = 1;
|
|
|
+ long gbfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -9 && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ ++gbchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ int row = rawtext[i] + 256 - 161;
|
|
|
+ int column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.GBFreq[row][column] != 0) {
|
|
|
+ gbfreq += (long)this.GBFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ gbfreq += 200L;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)gbchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)gbfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int gbk_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int gbchars = 1;
|
|
|
+ long gbfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ int row;
|
|
|
+ int column;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -9 && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ ++gbchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] + 256 - 161;
|
|
|
+ column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.GBFreq[row][column] != 0) {
|
|
|
+ gbfreq += (long)this.GBFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ gbfreq += 200L;
|
|
|
+ }
|
|
|
+ } else if (-127 <= rawtext[i] && rawtext[i] <= -2 && (-128 <= rawtext[i + 1] && rawtext[i + 1] <= -2 || 64 <= rawtext[i + 1] && rawtext[i + 1] <= 126)) {
|
|
|
+ ++gbchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] + 256 - 129;
|
|
|
+ if (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126) {
|
|
|
+ column = rawtext[i + 1] - 64;
|
|
|
+ } else {
|
|
|
+ column = rawtext[i + 1] + 256 - 64;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (this.GBKFreq[row][column] != 0) {
|
|
|
+ gbfreq += (long)this.GBKFreq[row][column];
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)gbchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)gbfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval) - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ int gb18030_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int gbchars = 1;
|
|
|
+ long gbfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ int row;
|
|
|
+ int column;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -9 && i + 1 < rawtextlen && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ ++gbchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] + 256 - 161;
|
|
|
+ column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.GBFreq[row][column] != 0) {
|
|
|
+ gbfreq += (long)this.GBFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ gbfreq += 200L;
|
|
|
+ }
|
|
|
+ } else if (-127 > rawtext[i] || rawtext[i] > -2 || i + 1 >= rawtextlen || (-128 > rawtext[i + 1] || rawtext[i + 1] > -2) && (64 > rawtext[i + 1] || rawtext[i + 1] > 126)) {
|
|
|
+ if (-127 <= rawtext[i] && rawtext[i] <= -2 && i + 3 < rawtextlen && 48 <= rawtext[i + 1] && rawtext[i + 1] <= 57 && -127 <= rawtext[i + 2] && rawtext[i + 2] <= -2 && 48 <= rawtext[i + 3] && rawtext[i + 3] <= 57) {
|
|
|
+ ++gbchars;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ ++gbchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] + 256 - 129;
|
|
|
+ if (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126) {
|
|
|
+ column = rawtext[i + 1] - 64;
|
|
|
+ } else {
|
|
|
+ column = rawtext[i + 1] + 256 - 64;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (this.GBKFreq[row][column] != 0) {
|
|
|
+ gbfreq += (long)this.GBKFreq[row][column];
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)gbchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)gbfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval) - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ int hz_probability(byte[] rawtext) {
|
|
|
+ int hzchars = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ long hzfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ int hzstart = 0;
|
|
|
+ int hzend = 0;
|
|
|
+ int rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen; ++i) {
|
|
|
+ if (rawtext[i] == 126) {
|
|
|
+ if (rawtext[i + 1] != 123) {
|
|
|
+ if (rawtext[i + 1] == 125) {
|
|
|
+ ++hzend;
|
|
|
+ ++i;
|
|
|
+ } else if (rawtext[i + 1] == 126) {
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ ++hzstart;
|
|
|
+
|
|
|
+ for(i += 2; i < rawtextlen - 1 && rawtext[i] != 10 && rawtext[i] != 13; i += 2) {
|
|
|
+ if (rawtext[i] == 126 && rawtext[i + 1] == 125) {
|
|
|
+ ++hzend;
|
|
|
+ ++i;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ int row;
|
|
|
+ int column;
|
|
|
+ if (33 <= rawtext[i] && rawtext[i] <= 119 && 33 <= rawtext[i + 1] && rawtext[i + 1] <= 119) {
|
|
|
+ hzchars += 2;
|
|
|
+ row = rawtext[i] - 33;
|
|
|
+ column = rawtext[i + 1] - 33;
|
|
|
+ totalfreq += 500L;
|
|
|
+ if (this.GBFreq[row][column] != 0) {
|
|
|
+ hzfreq += (long)this.GBFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ hzfreq += 200L;
|
|
|
+ }
|
|
|
+ } else if (161 <= rawtext[i] && rawtext[i] <= 247 && 161 <= rawtext[i + 1] && rawtext[i + 1] <= 247) {
|
|
|
+ hzchars += 2;
|
|
|
+ row = rawtext[i] + 256 - 161;
|
|
|
+ column = rawtext[i + 1] + 256 - 161;
|
|
|
+ totalfreq += 500L;
|
|
|
+ if (this.GBFreq[row][column] != 0) {
|
|
|
+ hzfreq += (long)this.GBFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ hzfreq += 200L;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ dbchars += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (hzstart > 4) {
|
|
|
+ rangeval = 50.0F;
|
|
|
+ } else if (hzstart > 1) {
|
|
|
+ rangeval = 41.0F;
|
|
|
+ } else if (hzstart > 0) {
|
|
|
+ rangeval = 39.0F;
|
|
|
+ } else {
|
|
|
+ rangeval = 0.0F;
|
|
|
+ }
|
|
|
+
|
|
|
+ freqval = 50.0F * ((float)hzfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int big5_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int bfchars = 1;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ long bffreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -7 && (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126 || -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2)) {
|
|
|
+ ++bfchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ int row = rawtext[i] + 256 - 161;
|
|
|
+ int column;
|
|
|
+ if (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126) {
|
|
|
+ column = rawtext[i + 1] - 64;
|
|
|
+ } else {
|
|
|
+ column = rawtext[i + 1] + 256 - 97;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (this.Big5Freq[row][column] != 0) {
|
|
|
+ bffreq += (long)this.Big5Freq[row][column];
|
|
|
+ } else if (3 <= row && row <= 37) {
|
|
|
+ bffreq += 200L;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)bfchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)bffreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int big5plus_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int bfchars = 1;
|
|
|
+ long bffreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 128) {
|
|
|
+ ++dbchars;
|
|
|
+ int row;
|
|
|
+ int column;
|
|
|
+ if (161 <= rawtext[i] && rawtext[i] <= 249 && (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126 || 161 <= rawtext[i + 1] && rawtext[i + 1] <= 254)) {
|
|
|
+ ++bfchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] - 161;
|
|
|
+ if (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126) {
|
|
|
+ column = rawtext[i + 1] - 64;
|
|
|
+ } else {
|
|
|
+ column = rawtext[i + 1] - 97;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (this.Big5Freq[row][column] != 0) {
|
|
|
+ bffreq += (long)this.Big5Freq[row][column];
|
|
|
+ } else if (3 <= row && row < 37) {
|
|
|
+ bffreq += 200L;
|
|
|
+ }
|
|
|
+ } else if (129 <= rawtext[i] && rawtext[i] <= 254 && (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126 || 128 <= rawtext[i + 1] && rawtext[i + 1] <= 254)) {
|
|
|
+ ++bfchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] - 129;
|
|
|
+ if (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126) {
|
|
|
+ column = rawtext[i + 1] - 64;
|
|
|
+ } else {
|
|
|
+ column = rawtext[i + 1] - 64;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (this.Big5PFreq[row][column] != 0) {
|
|
|
+ bffreq += (long)this.Big5PFreq[row][column];
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)bfchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)bffreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval) - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ int euc_tw_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int cnschars = 1;
|
|
|
+ long cnsfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (i + 3 < rawtextlen && -114 == rawtext[i] && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -80 && -95 <= rawtext[i + 2] && rawtext[i + 2] <= -2 && -95 <= rawtext[i + 3] && rawtext[i + 3] <= -2) {
|
|
|
+ ++cnschars;
|
|
|
+ i += 3;
|
|
|
+ } else if (-95 <= rawtext[i] && rawtext[i] <= -2 && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ ++cnschars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ int row = rawtext[i] + 256 - 161;
|
|
|
+ int column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.EUC_TWFreq[row][column] != 0) {
|
|
|
+ cnsfreq += (long)this.EUC_TWFreq[row][column];
|
|
|
+ } else if (35 <= row && row <= 92) {
|
|
|
+ cnsfreq += 150L;
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)cnschars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)cnsfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int iso_2022_cn_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int isochars = 1;
|
|
|
+ long isofreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] == 27 && i + 3 < rawtextlen) {
|
|
|
+ int row;
|
|
|
+ int column;
|
|
|
+ if (rawtext[i + 1] == 36 && rawtext[i + 2] == 41 && rawtext[i + 3] == 65) {
|
|
|
+ for(i += 4; rawtext[i] != 27; ++i) {
|
|
|
+ ++dbchars;
|
|
|
+ if (33 <= rawtext[i] && rawtext[i] <= 119 && 33 <= rawtext[i + 1] && rawtext[i + 1] <= 119) {
|
|
|
+ ++isochars;
|
|
|
+ row = rawtext[i] - 33;
|
|
|
+ column = rawtext[i + 1] - 33;
|
|
|
+ totalfreq += 500L;
|
|
|
+ if (this.GBFreq[row][column] != 0) {
|
|
|
+ isofreq += (long)this.GBFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ isofreq += 200L;
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if (i + 3 < rawtextlen && rawtext[i + 1] == 36 && rawtext[i + 2] == 41 && rawtext[i + 3] == 71) {
|
|
|
+ for(i += 4; rawtext[i] != 27; ++i) {
|
|
|
+ ++dbchars;
|
|
|
+ if (33 <= rawtext[i] && rawtext[i] <= 126 && 33 <= rawtext[i + 1] && rawtext[i + 1] <= 126) {
|
|
|
+ ++isochars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ row = rawtext[i] - 33;
|
|
|
+ column = rawtext[i + 1] - 33;
|
|
|
+ if (this.EUC_TWFreq[row][column] != 0) {
|
|
|
+ isofreq += (long)this.EUC_TWFreq[row][column];
|
|
|
+ } else if (35 <= row && row <= 92) {
|
|
|
+ isofreq += 150L;
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (rawtext[i] == 27 && i + 2 < rawtextlen && rawtext[i + 1] == 40 && rawtext[i + 2] == 66) {
|
|
|
+ i += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)isochars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)isofreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int utf8_probability(byte[] rawtext) {
|
|
|
+ int score = 0;
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int goodbytes = 0;
|
|
|
+ int asciibytes = 0;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen; ++i) {
|
|
|
+ if ((rawtext[i] & 127) == rawtext[i]) {
|
|
|
+ ++asciibytes;
|
|
|
+ } else if (-64 <= rawtext[i] && rawtext[i] <= -33 && i + 1 < rawtextlen && -128 <= rawtext[i + 1] && rawtext[i + 1] <= -65) {
|
|
|
+ goodbytes += 2;
|
|
|
+ ++i;
|
|
|
+ } else if (-32 <= rawtext[i] && rawtext[i] <= -17 && i + 2 < rawtextlen && -128 <= rawtext[i + 1] && rawtext[i + 1] <= -65 && -128 <= rawtext[i + 2] && rawtext[i + 2] <= -65) {
|
|
|
+ goodbytes += 3;
|
|
|
+ i += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (asciibytes == rawtextlen) {
|
|
|
+ return 0;
|
|
|
+ } else {
|
|
|
+ score = (int)(100.0F * ((float)goodbytes / (float)(rawtextlen - asciibytes)));
|
|
|
+ if (score > 98) {
|
|
|
+ return score;
|
|
|
+ } else if (score > 95 && goodbytes > 30) {
|
|
|
+ return score;
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ int utf16_probability(byte[] rawtext) {
|
|
|
+ return (rawtext.length <= 1 || -2 != rawtext[0] || -1 != rawtext[1]) && (-1 != rawtext[0] || -2 != rawtext[1]) ? 0 : 100;
|
|
|
+ }
|
|
|
+
|
|
|
+ int ascii_probability(byte[] rawtext) {
|
|
|
+ int score = 75;
|
|
|
+ int rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ score -= 5;
|
|
|
+ } else if (rawtext[i] == 27) {
|
|
|
+ score -= 5;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (score <= 0) {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return score;
|
|
|
+ }
|
|
|
+
|
|
|
+ int euc_kr_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int krchars = 1;
|
|
|
+ long krfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -2 && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ ++krchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ int row = rawtext[i] + 256 - 161;
|
|
|
+ int column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.KRFreq[row][column] != 0) {
|
|
|
+ krfreq += (long)this.KRFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ krfreq += 0L;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)krchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)krfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int cp949_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int krchars = 1;
|
|
|
+ long krfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (-127 <= rawtext[i] && rawtext[i] <= -2 && (65 <= rawtext[i + 1] && rawtext[i + 1] <= 90 || 97 <= rawtext[i + 1] && rawtext[i + 1] <= 122 || -127 <= rawtext[i + 1] && rawtext[i + 1] <= -2)) {
|
|
|
+ ++krchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -2 && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ int row = rawtext[i] + 256 - 161;
|
|
|
+ int column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.KRFreq[row][column] != 0) {
|
|
|
+ krfreq += (long)this.KRFreq[row][column];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)krchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)krfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int iso_2022_kr_probability(byte[] rawtext) {
|
|
|
+ for(int i = 0; i < rawtext.length; ++i) {
|
|
|
+ if (i + 3 < rawtext.length && rawtext[i] == 27 && (char)rawtext[i + 1] == '$' && (char)rawtext[i + 2] == ')' && (char)rawtext[i + 3] == 'C') {
|
|
|
+ return 100;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ int euc_jp_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int jpchars = 1;
|
|
|
+ long jpfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (-95 <= rawtext[i] && rawtext[i] <= -2 && -95 <= rawtext[i + 1] && rawtext[i + 1] <= -2) {
|
|
|
+ ++jpchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ int row = rawtext[i] + 256 - 161;
|
|
|
+ int column = rawtext[i + 1] + 256 - 161;
|
|
|
+ if (this.JPFreq[row][column] != 0) {
|
|
|
+ jpfreq += (long)this.JPFreq[row][column];
|
|
|
+ } else if (15 <= row && row < 55) {
|
|
|
+ jpfreq += 0L;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)jpchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)jpfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval);
|
|
|
+ }
|
|
|
+
|
|
|
+ int iso_2022_jp_probability(byte[] rawtext) {
|
|
|
+ for(int i = 0; i < rawtext.length; ++i) {
|
|
|
+ if (i + 2 < rawtext.length && rawtext[i] == 27 && (char)rawtext[i + 1] == '$' && (char)rawtext[i + 2] == 'B') {
|
|
|
+ return 100;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ int sjis_probability(byte[] rawtext) {
|
|
|
+ int rawtextlen = 0;
|
|
|
+ int dbchars = 1;
|
|
|
+ int jpchars = 1;
|
|
|
+ long jpfreq = 0L;
|
|
|
+ long totalfreq = 1L;
|
|
|
+ float rangeval = 0.0F;
|
|
|
+ float freqval = 0.0F;
|
|
|
+ rawtextlen = rawtext.length;
|
|
|
+
|
|
|
+ for(int i = 0; i < rawtextlen - 1; ++i) {
|
|
|
+ if (rawtext[i] < 0) {
|
|
|
+ ++dbchars;
|
|
|
+ if (i + 1 < rawtext.length && (-127 <= rawtext[i] && rawtext[i] <= -97 || -32 <= rawtext[i] && rawtext[i] <= -17) && (64 <= rawtext[i + 1] && rawtext[i + 1] <= 126 || -128 <= rawtext[i + 1] && rawtext[i + 1] <= -4)) {
|
|
|
+ ++jpchars;
|
|
|
+ totalfreq += 500L;
|
|
|
+ int row = rawtext[i] + 256;
|
|
|
+ int column = rawtext[i + 1] + 256;
|
|
|
+ byte adjust;
|
|
|
+ if (column < 159) {
|
|
|
+ adjust = 1;
|
|
|
+ if (column > 127) {
|
|
|
+ column -= 32;
|
|
|
+ } else {
|
|
|
+ column -= 25;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ adjust = 0;
|
|
|
+ column -= 126;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (row < 160) {
|
|
|
+ row = (row - 112 << 1) - adjust;
|
|
|
+ } else {
|
|
|
+ row = (row - 176 << 1) - adjust;
|
|
|
+ }
|
|
|
+
|
|
|
+ row -= 32;
|
|
|
+ column = 32;
|
|
|
+ if (row < this.JPFreq.length && column < this.JPFreq[row].length && this.JPFreq[row][column] != 0) {
|
|
|
+ jpfreq += (long)this.JPFreq[row][column];
|
|
|
+ }
|
|
|
+
|
|
|
+ ++i;
|
|
|
+ } else if (-95 <= rawtext[i] && rawtext[i] <= -33) {
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ rangeval = 50.0F * ((float)jpchars / (float)dbchars);
|
|
|
+ freqval = 50.0F * ((float)jpfreq / (float)totalfreq);
|
|
|
+ return (int)(rangeval + freqval) - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ void initialize_frequencies() {
|
|
|
+ // $FF: Couldn't be decompiled
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|