diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp index 3bb5e3009..05eb4eed7 100644 --- a/src/EncodingDetection.cpp +++ b/src/EncodingDetection.cpp @@ -647,11 +647,7 @@ constexpr cpi_enc_t _MapStdEncodingString2CPI(const char* encStrg, float* pConfi bool bBOM = false; bool bReverse = false; cpi_enc_t const cpi = GetUnicodeEncoding(text, len, &bBOM, &bReverse); - if (!Encoding_IsNONE(cpiEncoding)) { - cpiEncoding = cpi; - } else { - cpiEncoding = bBOM ? (bReverse ? CPI_UNICODEBE : CPI_UNICODE) : (bReverse ? CPI_UNICODEBE : CPI_UNICODE); - } + cpiEncoding = cpi; } // check for default ANSI @@ -814,6 +810,7 @@ void Encoding_AnalyzeText(const char* const text, const size_t len, // --- re-mapping UCD ---- switch (Encoding_GetCodePage(cpiEncoding_UCD)) { + case 28591: // ISO 8859 - 1 mapped to Windows - 1252 (HTML5 Standard advice) cpiEncoding_UCD = Encoding_GetByCodePage(1252); // auto detect default ANSI (!) break; @@ -1303,6 +1300,7 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD } if (!IS_ENC_ENFORCED() || bForceEncDetection) { + if (!bSkipANSICPDetection) { // --------------------------------------------------------------------------- Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &encDetRes, iAnalyzeHint); @@ -1375,11 +1373,22 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD } else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly)) { - encDetRes.Encoding = encDetRes.analyzedEncoding; + if (!encDetRes.bIsAnalysisReliable && !Encoding_IsUTF8(encDetRes.analyzedEncoding) && encDetRes.bValidUTF8) { + encDetRes.Encoding = CPI_UTF8; // unreliable non-UTF-8 guess, but data is valid UTF-8 + } else { + encDetRes.Encoding = encDetRes.analyzedEncoding; + } + } + else if (!encDetRes.bIsAnalysisReliable && Encoding_IsValid(encDetRes.analyzedEncoding)) + { + // UCHARDET below confidence threshold (UseReliableCEDonly is true) + encDetRes.Encoding = encDetRes.bValidUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT; } else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && (iConfidence > 66)) { - encDetRes.Encoding = encDetRes.analyzedEncoding; // (1) rely on analyzed encoding + // unicodeAnalysis (IsTextUnicode) confirms Unicode structure, + // iConfidence is from UCHARDET analysis — use analyzedEncoding (intentional) + encDetRes.Encoding = encDetRes.analyzedEncoding; } else if (Encoding_IsValid(Encoding_SrcWeak(CPI_GET))) { diff --git a/src/uchardet/uchardet/test_moved.readme b/src/uchardet/uchardet/test_moved.readme new file mode 100644 index 000000000..7e5c8f255 --- /dev/null +++ b/src/uchardet/uchardet/test_moved.readme @@ -0,0 +1 @@ +tests are moved to Notepad3\test\test_files\encoding\uchardet\ diff --git a/src/uchardet/uchardet/test/CMakeLists.txt b/test/test_files/encoding/uchardet/CMakeLists.txt similarity index 100% rename from src/uchardet/uchardet/test/CMakeLists.txt rename to test/test_files/encoding/uchardet/CMakeLists.txt diff --git a/src/uchardet/uchardet/test/ar/iso-8859-6.txt b/test/test_files/encoding/uchardet/ar/iso-8859-6.txt similarity index 100% rename from src/uchardet/uchardet/test/ar/iso-8859-6.txt rename to test/test_files/encoding/uchardet/ar/iso-8859-6.txt diff --git a/src/uchardet/uchardet/test/ar/utf-8.txt b/test/test_files/encoding/uchardet/ar/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/ar/utf-8.txt rename to test/test_files/encoding/uchardet/ar/utf-8.txt diff --git a/src/uchardet/uchardet/test/ar/windows-1256.txt b/test/test_files/encoding/uchardet/ar/windows-1256.txt similarity index 100% rename from src/uchardet/uchardet/test/ar/windows-1256.txt rename to test/test_files/encoding/uchardet/ar/windows-1256.txt diff --git a/src/uchardet/uchardet/test/bg/windows-1251.txt b/test/test_files/encoding/uchardet/bg/windows-1251.txt similarity index 100% rename from src/uchardet/uchardet/test/bg/windows-1251.txt rename to test/test_files/encoding/uchardet/bg/windows-1251.txt diff --git a/src/uchardet/uchardet/test/cs/ibm852.txt b/test/test_files/encoding/uchardet/cs/ibm852.txt similarity index 100% rename from src/uchardet/uchardet/test/cs/ibm852.txt rename to test/test_files/encoding/uchardet/cs/ibm852.txt diff --git a/src/uchardet/uchardet/test/cs/iso-8859-2.txt b/test/test_files/encoding/uchardet/cs/iso-8859-2.txt similarity index 100% rename from src/uchardet/uchardet/test/cs/iso-8859-2.txt rename to test/test_files/encoding/uchardet/cs/iso-8859-2.txt diff --git a/src/uchardet/uchardet/test/cs/mac-centraleurope.txt b/test/test_files/encoding/uchardet/cs/mac-centraleurope.txt similarity index 100% rename from src/uchardet/uchardet/test/cs/mac-centraleurope.txt rename to test/test_files/encoding/uchardet/cs/mac-centraleurope.txt diff --git a/src/uchardet/uchardet/test/cs/utf-8.txt b/test/test_files/encoding/uchardet/cs/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/cs/utf-8.txt rename to test/test_files/encoding/uchardet/cs/utf-8.txt diff --git a/src/uchardet/uchardet/test/cs/windows-1250.txt b/test/test_files/encoding/uchardet/cs/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/cs/windows-1250.txt rename to test/test_files/encoding/uchardet/cs/windows-1250.txt diff --git a/src/uchardet/uchardet/test/da/ibm865.txt b/test/test_files/encoding/uchardet/da/ibm865.txt similarity index 100% rename from src/uchardet/uchardet/test/da/ibm865.txt rename to test/test_files/encoding/uchardet/da/ibm865.txt diff --git a/src/uchardet/uchardet/test/da/iso-8859-1.txt b/test/test_files/encoding/uchardet/da/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/da/iso-8859-1.txt rename to test/test_files/encoding/uchardet/da/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/da/iso-8859-15.txt b/test/test_files/encoding/uchardet/da/iso-8859-15.txt similarity index 100% rename from src/uchardet/uchardet/test/da/iso-8859-15.txt rename to test/test_files/encoding/uchardet/da/iso-8859-15.txt diff --git a/src/uchardet/uchardet/test/da/utf-8.txt b/test/test_files/encoding/uchardet/da/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/da/utf-8.txt rename to test/test_files/encoding/uchardet/da/utf-8.txt diff --git a/src/uchardet/uchardet/test/da/windows-1252.txt b/test/test_files/encoding/uchardet/da/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/da/windows-1252.txt rename to test/test_files/encoding/uchardet/da/windows-1252.txt diff --git a/src/uchardet/uchardet/test/de/iso-8859-1.txt b/test/test_files/encoding/uchardet/de/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/de/iso-8859-1.txt rename to test/test_files/encoding/uchardet/de/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/de/windows-1252.txt b/test/test_files/encoding/uchardet/de/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/de/windows-1252.txt rename to test/test_files/encoding/uchardet/de/windows-1252.txt diff --git a/src/uchardet/uchardet/test/el/iso-8859-7.txt b/test/test_files/encoding/uchardet/el/iso-8859-7.txt similarity index 100% rename from src/uchardet/uchardet/test/el/iso-8859-7.txt rename to test/test_files/encoding/uchardet/el/iso-8859-7.txt diff --git a/src/uchardet/uchardet/test/el/utf-8.txt b/test/test_files/encoding/uchardet/el/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/el/utf-8.txt rename to test/test_files/encoding/uchardet/el/utf-8.txt diff --git a/src/uchardet/uchardet/test/el/windows-1253.txt b/test/test_files/encoding/uchardet/el/windows-1253.txt similarity index 100% rename from src/uchardet/uchardet/test/el/windows-1253.txt rename to test/test_files/encoding/uchardet/el/windows-1253.txt diff --git a/src/uchardet/uchardet/test/en/ascii.txt b/test/test_files/encoding/uchardet/en/ascii.txt similarity index 100% rename from src/uchardet/uchardet/test/en/ascii.txt rename to test/test_files/encoding/uchardet/en/ascii.txt diff --git a/src/uchardet/uchardet/test/eo/iso-8859-3.txt b/test/test_files/encoding/uchardet/eo/iso-8859-3.txt similarity index 100% rename from src/uchardet/uchardet/test/eo/iso-8859-3.txt rename to test/test_files/encoding/uchardet/eo/iso-8859-3.txt diff --git a/src/uchardet/uchardet/test/es/iso-8859-1.txt b/test/test_files/encoding/uchardet/es/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/es/iso-8859-1.txt rename to test/test_files/encoding/uchardet/es/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/es/iso-8859-15.txt b/test/test_files/encoding/uchardet/es/iso-8859-15.txt similarity index 100% rename from src/uchardet/uchardet/test/es/iso-8859-15.txt rename to test/test_files/encoding/uchardet/es/iso-8859-15.txt diff --git a/src/uchardet/uchardet/test/es/utf-8.txt b/test/test_files/encoding/uchardet/es/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/es/utf-8.txt rename to test/test_files/encoding/uchardet/es/utf-8.txt diff --git a/src/uchardet/uchardet/test/es/windows-1252.txt b/test/test_files/encoding/uchardet/es/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/es/windows-1252.txt rename to test/test_files/encoding/uchardet/es/windows-1252.txt diff --git a/src/uchardet/uchardet/test/et/iso-8859-13.txt b/test/test_files/encoding/uchardet/et/iso-8859-13.txt similarity index 100% rename from src/uchardet/uchardet/test/et/iso-8859-13.txt rename to test/test_files/encoding/uchardet/et/iso-8859-13.txt diff --git a/src/uchardet/uchardet/test/et/iso-8859-15.txt b/test/test_files/encoding/uchardet/et/iso-8859-15.txt similarity index 100% rename from src/uchardet/uchardet/test/et/iso-8859-15.txt rename to test/test_files/encoding/uchardet/et/iso-8859-15.txt diff --git a/src/uchardet/uchardet/test/et/iso-8859-4.txt b/test/test_files/encoding/uchardet/et/iso-8859-4.txt similarity index 100% rename from src/uchardet/uchardet/test/et/iso-8859-4.txt rename to test/test_files/encoding/uchardet/et/iso-8859-4.txt diff --git a/src/uchardet/uchardet/test/et/utf-8.txt b/test/test_files/encoding/uchardet/et/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/et/utf-8.txt rename to test/test_files/encoding/uchardet/et/utf-8.txt diff --git a/src/uchardet/uchardet/test/et/windows-1252.txt b/test/test_files/encoding/uchardet/et/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/et/windows-1252.txt rename to test/test_files/encoding/uchardet/et/windows-1252.txt diff --git a/src/uchardet/uchardet/test/et/windows-1257.txt b/test/test_files/encoding/uchardet/et/windows-1257.txt similarity index 100% rename from src/uchardet/uchardet/test/et/windows-1257.txt rename to test/test_files/encoding/uchardet/et/windows-1257.txt diff --git a/src/uchardet/uchardet/test/fi/iso-8859-1.txt b/test/test_files/encoding/uchardet/fi/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/fi/iso-8859-1.txt rename to test/test_files/encoding/uchardet/fi/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/fi/utf-8.txt b/test/test_files/encoding/uchardet/fi/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/fi/utf-8.txt rename to test/test_files/encoding/uchardet/fi/utf-8.txt diff --git a/src/uchardet/uchardet/test/fr/iso-8859-1.txt b/test/test_files/encoding/uchardet/fr/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/fr/iso-8859-1.txt rename to test/test_files/encoding/uchardet/fr/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/fr/iso-8859-15.txt b/test/test_files/encoding/uchardet/fr/iso-8859-15.txt similarity index 100% rename from src/uchardet/uchardet/test/fr/iso-8859-15.txt rename to test/test_files/encoding/uchardet/fr/iso-8859-15.txt diff --git a/src/uchardet/uchardet/test/fr/utf-16.be b/test/test_files/encoding/uchardet/fr/utf-16.be similarity index 100% rename from src/uchardet/uchardet/test/fr/utf-16.be rename to test/test_files/encoding/uchardet/fr/utf-16.be diff --git a/src/uchardet/uchardet/test/fr/utf-32.le b/test/test_files/encoding/uchardet/fr/utf-32.le similarity index 100% rename from src/uchardet/uchardet/test/fr/utf-32.le rename to test/test_files/encoding/uchardet/fr/utf-32.le diff --git a/src/uchardet/uchardet/test/fr/utf-8.txt b/test/test_files/encoding/uchardet/fr/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/fr/utf-8.txt rename to test/test_files/encoding/uchardet/fr/utf-8.txt diff --git a/src/uchardet/uchardet/test/fr/windows-1252.txt b/test/test_files/encoding/uchardet/fr/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/fr/windows-1252.txt rename to test/test_files/encoding/uchardet/fr/windows-1252.txt diff --git a/src/uchardet/uchardet/test/ga/iso-8859-1.txt b/test/test_files/encoding/uchardet/ga/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/ga/iso-8859-1.txt rename to test/test_files/encoding/uchardet/ga/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/ga/utf-8.txt b/test/test_files/encoding/uchardet/ga/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/ga/utf-8.txt rename to test/test_files/encoding/uchardet/ga/utf-8.txt diff --git a/src/uchardet/uchardet/test/ga/windows-1252.txt b/test/test_files/encoding/uchardet/ga/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/ga/windows-1252.txt rename to test/test_files/encoding/uchardet/ga/windows-1252.txt diff --git a/src/uchardet/uchardet/test/he/iso-8859-8.txt b/test/test_files/encoding/uchardet/he/iso-8859-8.txt similarity index 100% rename from src/uchardet/uchardet/test/he/iso-8859-8.txt rename to test/test_files/encoding/uchardet/he/iso-8859-8.txt diff --git a/src/uchardet/uchardet/test/he/utf-8.txt b/test/test_files/encoding/uchardet/he/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/he/utf-8.txt rename to test/test_files/encoding/uchardet/he/utf-8.txt diff --git a/src/uchardet/uchardet/test/he/windows-1255.txt b/test/test_files/encoding/uchardet/he/windows-1255.txt similarity index 100% rename from src/uchardet/uchardet/test/he/windows-1255.txt rename to test/test_files/encoding/uchardet/he/windows-1255.txt diff --git a/src/uchardet/uchardet/test/hr/ibm852.txt b/test/test_files/encoding/uchardet/hr/ibm852.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/ibm852.txt rename to test/test_files/encoding/uchardet/hr/ibm852.txt diff --git a/src/uchardet/uchardet/test/hr/iso-8859-13.txt b/test/test_files/encoding/uchardet/hr/iso-8859-13.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/iso-8859-13.txt rename to test/test_files/encoding/uchardet/hr/iso-8859-13.txt diff --git a/src/uchardet/uchardet/test/hr/iso-8859-16.txt b/test/test_files/encoding/uchardet/hr/iso-8859-16.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/iso-8859-16.txt rename to test/test_files/encoding/uchardet/hr/iso-8859-16.txt diff --git a/src/uchardet/uchardet/test/hr/iso-8859-2.txt b/test/test_files/encoding/uchardet/hr/iso-8859-2.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/iso-8859-2.txt rename to test/test_files/encoding/uchardet/hr/iso-8859-2.txt diff --git a/src/uchardet/uchardet/test/hr/mac-centraleurope.txt b/test/test_files/encoding/uchardet/hr/mac-centraleurope.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/mac-centraleurope.txt rename to test/test_files/encoding/uchardet/hr/mac-centraleurope.txt diff --git a/src/uchardet/uchardet/test/hr/utf-8.txt b/test/test_files/encoding/uchardet/hr/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/utf-8.txt rename to test/test_files/encoding/uchardet/hr/utf-8.txt diff --git a/src/uchardet/uchardet/test/hr/windows-1250.txt b/test/test_files/encoding/uchardet/hr/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/hr/windows-1250.txt rename to test/test_files/encoding/uchardet/hr/windows-1250.txt diff --git a/src/uchardet/uchardet/test/hu/iso-8859-2.txt b/test/test_files/encoding/uchardet/hu/iso-8859-2.txt similarity index 100% rename from src/uchardet/uchardet/test/hu/iso-8859-2.txt rename to test/test_files/encoding/uchardet/hu/iso-8859-2.txt diff --git a/src/uchardet/uchardet/test/hu/windows-1250.txt b/test/test_files/encoding/uchardet/hu/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/hu/windows-1250.txt rename to test/test_files/encoding/uchardet/hu/windows-1250.txt diff --git a/src/uchardet/uchardet/test/it/iso-8859-1.txt b/test/test_files/encoding/uchardet/it/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/it/iso-8859-1.txt rename to test/test_files/encoding/uchardet/it/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/it/utf-8.txt b/test/test_files/encoding/uchardet/it/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/it/utf-8.txt rename to test/test_files/encoding/uchardet/it/utf-8.txt diff --git a/src/uchardet/uchardet/test/ja/euc-jp.txt b/test/test_files/encoding/uchardet/ja/euc-jp.txt similarity index 100% rename from src/uchardet/uchardet/test/ja/euc-jp.txt rename to test/test_files/encoding/uchardet/ja/euc-jp.txt diff --git a/src/uchardet/uchardet/test/ja/iso-2022-jp.txt b/test/test_files/encoding/uchardet/ja/iso-2022-jp.txt similarity index 100% rename from src/uchardet/uchardet/test/ja/iso-2022-jp.txt rename to test/test_files/encoding/uchardet/ja/iso-2022-jp.txt diff --git a/src/uchardet/uchardet/test/ja/shift_jis.txt b/test/test_files/encoding/uchardet/ja/shift_jis.txt similarity index 100% rename from src/uchardet/uchardet/test/ja/shift_jis.txt rename to test/test_files/encoding/uchardet/ja/shift_jis.txt diff --git a/src/uchardet/uchardet/test/ja/utf-16be.txt b/test/test_files/encoding/uchardet/ja/utf-16be.txt similarity index 100% rename from src/uchardet/uchardet/test/ja/utf-16be.txt rename to test/test_files/encoding/uchardet/ja/utf-16be.txt diff --git a/src/uchardet/uchardet/test/ja/utf-16le.txt b/test/test_files/encoding/uchardet/ja/utf-16le.txt similarity index 100% rename from src/uchardet/uchardet/test/ja/utf-16le.txt rename to test/test_files/encoding/uchardet/ja/utf-16le.txt diff --git a/src/uchardet/uchardet/test/ja/utf-8.txt b/test/test_files/encoding/uchardet/ja/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/ja/utf-8.txt rename to test/test_files/encoding/uchardet/ja/utf-8.txt diff --git a/src/uchardet/uchardet/test/ko/iso-2022-kr.txt b/test/test_files/encoding/uchardet/ko/iso-2022-kr.txt similarity index 100% rename from src/uchardet/uchardet/test/ko/iso-2022-kr.txt rename to test/test_files/encoding/uchardet/ko/iso-2022-kr.txt diff --git a/src/uchardet/uchardet/test/ko/uhc.smi b/test/test_files/encoding/uchardet/ko/uhc.smi similarity index 100% rename from src/uchardet/uchardet/test/ko/uhc.smi rename to test/test_files/encoding/uchardet/ko/uhc.smi diff --git a/src/uchardet/uchardet/test/ko/utf-16.le b/test/test_files/encoding/uchardet/ko/utf-16.le similarity index 100% rename from src/uchardet/uchardet/test/ko/utf-16.le rename to test/test_files/encoding/uchardet/ko/utf-16.le diff --git a/src/uchardet/uchardet/test/ko/utf-32.be b/test/test_files/encoding/uchardet/ko/utf-32.be similarity index 100% rename from src/uchardet/uchardet/test/ko/utf-32.be rename to test/test_files/encoding/uchardet/ko/utf-32.be diff --git a/src/uchardet/uchardet/test/ko/utf-8.txt b/test/test_files/encoding/uchardet/ko/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/ko/utf-8.txt rename to test/test_files/encoding/uchardet/ko/utf-8.txt diff --git a/src/uchardet/uchardet/test/lt/iso-8859-10.txt b/test/test_files/encoding/uchardet/lt/iso-8859-10.txt similarity index 100% rename from src/uchardet/uchardet/test/lt/iso-8859-10.txt rename to test/test_files/encoding/uchardet/lt/iso-8859-10.txt diff --git a/src/uchardet/uchardet/test/lt/iso-8859-13.txt b/test/test_files/encoding/uchardet/lt/iso-8859-13.txt similarity index 100% rename from src/uchardet/uchardet/test/lt/iso-8859-13.txt rename to test/test_files/encoding/uchardet/lt/iso-8859-13.txt diff --git a/src/uchardet/uchardet/test/lt/iso-8859-4.txt b/test/test_files/encoding/uchardet/lt/iso-8859-4.txt similarity index 100% rename from src/uchardet/uchardet/test/lt/iso-8859-4.txt rename to test/test_files/encoding/uchardet/lt/iso-8859-4.txt diff --git a/src/uchardet/uchardet/test/lt/utf-8.txt b/test/test_files/encoding/uchardet/lt/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/lt/utf-8.txt rename to test/test_files/encoding/uchardet/lt/utf-8.txt diff --git a/src/uchardet/uchardet/test/lv/iso-8859-10.txt b/test/test_files/encoding/uchardet/lv/iso-8859-10.txt similarity index 100% rename from src/uchardet/uchardet/test/lv/iso-8859-10.txt rename to test/test_files/encoding/uchardet/lv/iso-8859-10.txt diff --git a/src/uchardet/uchardet/test/lv/iso-8859-13.txt b/test/test_files/encoding/uchardet/lv/iso-8859-13.txt similarity index 100% rename from src/uchardet/uchardet/test/lv/iso-8859-13.txt rename to test/test_files/encoding/uchardet/lv/iso-8859-13.txt diff --git a/src/uchardet/uchardet/test/lv/iso-8859-4.txt b/test/test_files/encoding/uchardet/lv/iso-8859-4.txt similarity index 100% rename from src/uchardet/uchardet/test/lv/iso-8859-4.txt rename to test/test_files/encoding/uchardet/lv/iso-8859-4.txt diff --git a/src/uchardet/uchardet/test/lv/utf-8.txt b/test/test_files/encoding/uchardet/lv/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/lv/utf-8.txt rename to test/test_files/encoding/uchardet/lv/utf-8.txt diff --git a/src/uchardet/uchardet/test/mt/iso-8859-3.txt b/test/test_files/encoding/uchardet/mt/iso-8859-3.txt similarity index 100% rename from src/uchardet/uchardet/test/mt/iso-8859-3.txt rename to test/test_files/encoding/uchardet/mt/iso-8859-3.txt diff --git a/src/uchardet/uchardet/test/mt/utf-8.txt b/test/test_files/encoding/uchardet/mt/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/mt/utf-8.txt rename to test/test_files/encoding/uchardet/mt/utf-8.txt diff --git a/src/uchardet/uchardet/test/no/ibm865.txt b/test/test_files/encoding/uchardet/no/ibm865.txt similarity index 100% rename from src/uchardet/uchardet/test/no/ibm865.txt rename to test/test_files/encoding/uchardet/no/ibm865.txt diff --git a/src/uchardet/uchardet/test/no/iso-8859-1.txt b/test/test_files/encoding/uchardet/no/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/no/iso-8859-1.txt rename to test/test_files/encoding/uchardet/no/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/no/iso-8859-15.txt b/test/test_files/encoding/uchardet/no/iso-8859-15.txt similarity index 100% rename from src/uchardet/uchardet/test/no/iso-8859-15.txt rename to test/test_files/encoding/uchardet/no/iso-8859-15.txt diff --git a/src/uchardet/uchardet/test/no/utf-8.txt b/test/test_files/encoding/uchardet/no/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/no/utf-8.txt rename to test/test_files/encoding/uchardet/no/utf-8.txt diff --git a/src/uchardet/uchardet/test/no/windows-1252.txt b/test/test_files/encoding/uchardet/no/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/no/windows-1252.txt rename to test/test_files/encoding/uchardet/no/windows-1252.txt diff --git a/src/uchardet/uchardet/test/pl/ibm852.txt b/test/test_files/encoding/uchardet/pl/ibm852.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/ibm852.txt rename to test/test_files/encoding/uchardet/pl/ibm852.txt diff --git a/src/uchardet/uchardet/test/pl/iso-8859-13.txt b/test/test_files/encoding/uchardet/pl/iso-8859-13.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/iso-8859-13.txt rename to test/test_files/encoding/uchardet/pl/iso-8859-13.txt diff --git a/src/uchardet/uchardet/test/pl/iso-8859-16.txt b/test/test_files/encoding/uchardet/pl/iso-8859-16.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/iso-8859-16.txt rename to test/test_files/encoding/uchardet/pl/iso-8859-16.txt diff --git a/src/uchardet/uchardet/test/pl/iso-8859-2.txt b/test/test_files/encoding/uchardet/pl/iso-8859-2.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/iso-8859-2.txt rename to test/test_files/encoding/uchardet/pl/iso-8859-2.txt diff --git a/src/uchardet/uchardet/test/pl/mac-centraleurope.txt b/test/test_files/encoding/uchardet/pl/mac-centraleurope.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/mac-centraleurope.txt rename to test/test_files/encoding/uchardet/pl/mac-centraleurope.txt diff --git a/src/uchardet/uchardet/test/pl/utf-8.txt b/test/test_files/encoding/uchardet/pl/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/utf-8.txt rename to test/test_files/encoding/uchardet/pl/utf-8.txt diff --git a/src/uchardet/uchardet/test/pl/windows-1250.txt b/test/test_files/encoding/uchardet/pl/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/pl/windows-1250.txt rename to test/test_files/encoding/uchardet/pl/windows-1250.txt diff --git a/src/uchardet/uchardet/test/pt/iso-8859-1.txt b/test/test_files/encoding/uchardet/pt/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/pt/iso-8859-1.txt rename to test/test_files/encoding/uchardet/pt/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/pt/utf-8.txt b/test/test_files/encoding/uchardet/pt/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/pt/utf-8.txt rename to test/test_files/encoding/uchardet/pt/utf-8.txt diff --git a/src/uchardet/uchardet/test/ro/ibm852.txt b/test/test_files/encoding/uchardet/ro/ibm852.txt similarity index 100% rename from src/uchardet/uchardet/test/ro/ibm852.txt rename to test/test_files/encoding/uchardet/ro/ibm852.txt diff --git a/src/uchardet/uchardet/test/ro/iso-8859-16.txt b/test/test_files/encoding/uchardet/ro/iso-8859-16.txt similarity index 100% rename from src/uchardet/uchardet/test/ro/iso-8859-16.txt rename to test/test_files/encoding/uchardet/ro/iso-8859-16.txt diff --git a/src/uchardet/uchardet/test/ro/utf-8.txt b/test/test_files/encoding/uchardet/ro/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/ro/utf-8.txt rename to test/test_files/encoding/uchardet/ro/utf-8.txt diff --git a/src/uchardet/uchardet/test/ro/windows-1250.txt b/test/test_files/encoding/uchardet/ro/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/ro/windows-1250.txt rename to test/test_files/encoding/uchardet/ro/windows-1250.txt diff --git a/src/uchardet/uchardet/test/ru/ibm855.txt b/test/test_files/encoding/uchardet/ru/ibm855.txt similarity index 100% rename from src/uchardet/uchardet/test/ru/ibm855.txt rename to test/test_files/encoding/uchardet/ru/ibm855.txt diff --git a/src/uchardet/uchardet/test/ru/ibm866.txt b/test/test_files/encoding/uchardet/ru/ibm866.txt similarity index 100% rename from src/uchardet/uchardet/test/ru/ibm866.txt rename to test/test_files/encoding/uchardet/ru/ibm866.txt diff --git a/src/uchardet/uchardet/test/ru/iso-8859-5.txt b/test/test_files/encoding/uchardet/ru/iso-8859-5.txt similarity index 100% rename from src/uchardet/uchardet/test/ru/iso-8859-5.txt rename to test/test_files/encoding/uchardet/ru/iso-8859-5.txt diff --git a/src/uchardet/uchardet/test/ru/koi8-r.txt b/test/test_files/encoding/uchardet/ru/koi8-r.txt similarity index 100% rename from src/uchardet/uchardet/test/ru/koi8-r.txt rename to test/test_files/encoding/uchardet/ru/koi8-r.txt diff --git a/src/uchardet/uchardet/test/ru/mac-cyrillic.txt b/test/test_files/encoding/uchardet/ru/mac-cyrillic.txt similarity index 100% rename from src/uchardet/uchardet/test/ru/mac-cyrillic.txt rename to test/test_files/encoding/uchardet/ru/mac-cyrillic.txt diff --git a/src/uchardet/uchardet/test/ru/windows-1251.txt b/test/test_files/encoding/uchardet/ru/windows-1251.txt similarity index 100% rename from src/uchardet/uchardet/test/ru/windows-1251.txt rename to test/test_files/encoding/uchardet/ru/windows-1251.txt diff --git a/src/uchardet/uchardet/test/sk/ibm852.txt b/test/test_files/encoding/uchardet/sk/ibm852.txt similarity index 100% rename from src/uchardet/uchardet/test/sk/ibm852.txt rename to test/test_files/encoding/uchardet/sk/ibm852.txt diff --git a/src/uchardet/uchardet/test/sk/iso-8859-2.txt b/test/test_files/encoding/uchardet/sk/iso-8859-2.txt similarity index 100% rename from src/uchardet/uchardet/test/sk/iso-8859-2.txt rename to test/test_files/encoding/uchardet/sk/iso-8859-2.txt diff --git a/src/uchardet/uchardet/test/sk/mac-centraleurope.txt b/test/test_files/encoding/uchardet/sk/mac-centraleurope.txt similarity index 100% rename from src/uchardet/uchardet/test/sk/mac-centraleurope.txt rename to test/test_files/encoding/uchardet/sk/mac-centraleurope.txt diff --git a/src/uchardet/uchardet/test/sk/utf-8.txt b/test/test_files/encoding/uchardet/sk/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/sk/utf-8.txt rename to test/test_files/encoding/uchardet/sk/utf-8.txt diff --git a/src/uchardet/uchardet/test/sk/windows-1250.txt b/test/test_files/encoding/uchardet/sk/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/sk/windows-1250.txt rename to test/test_files/encoding/uchardet/sk/windows-1250.txt diff --git a/src/uchardet/uchardet/test/sl/ibm852.txt b/test/test_files/encoding/uchardet/sl/ibm852.txt similarity index 100% rename from src/uchardet/uchardet/test/sl/ibm852.txt rename to test/test_files/encoding/uchardet/sl/ibm852.txt diff --git a/src/uchardet/uchardet/test/sl/iso-8859-16.txt b/test/test_files/encoding/uchardet/sl/iso-8859-16.txt similarity index 100% rename from src/uchardet/uchardet/test/sl/iso-8859-16.txt rename to test/test_files/encoding/uchardet/sl/iso-8859-16.txt diff --git a/src/uchardet/uchardet/test/sl/iso-8859-2.txt b/test/test_files/encoding/uchardet/sl/iso-8859-2.txt similarity index 100% rename from src/uchardet/uchardet/test/sl/iso-8859-2.txt rename to test/test_files/encoding/uchardet/sl/iso-8859-2.txt diff --git a/src/uchardet/uchardet/test/sl/mac-centraleurope.txt b/test/test_files/encoding/uchardet/sl/mac-centraleurope.txt similarity index 100% rename from src/uchardet/uchardet/test/sl/mac-centraleurope.txt rename to test/test_files/encoding/uchardet/sl/mac-centraleurope.txt diff --git a/src/uchardet/uchardet/test/sl/utf-8.txt b/test/test_files/encoding/uchardet/sl/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/sl/utf-8.txt rename to test/test_files/encoding/uchardet/sl/utf-8.txt diff --git a/src/uchardet/uchardet/test/sl/windows-1250.txt b/test/test_files/encoding/uchardet/sl/windows-1250.txt similarity index 100% rename from src/uchardet/uchardet/test/sl/windows-1250.txt rename to test/test_files/encoding/uchardet/sl/windows-1250.txt diff --git a/src/uchardet/uchardet/test/sv/iso-8859-1.txt b/test/test_files/encoding/uchardet/sv/iso-8859-1.txt similarity index 100% rename from src/uchardet/uchardet/test/sv/iso-8859-1.txt rename to test/test_files/encoding/uchardet/sv/iso-8859-1.txt diff --git a/src/uchardet/uchardet/test/sv/utf-8.txt b/test/test_files/encoding/uchardet/sv/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/sv/utf-8.txt rename to test/test_files/encoding/uchardet/sv/utf-8.txt diff --git a/src/uchardet/uchardet/test/sv/windows-1252.txt b/test/test_files/encoding/uchardet/sv/windows-1252.txt similarity index 100% rename from src/uchardet/uchardet/test/sv/windows-1252.txt rename to test/test_files/encoding/uchardet/sv/windows-1252.txt diff --git a/src/uchardet/uchardet/test/th/iso-8859-11.txt b/test/test_files/encoding/uchardet/th/iso-8859-11.txt similarity index 100% rename from src/uchardet/uchardet/test/th/iso-8859-11.txt rename to test/test_files/encoding/uchardet/th/iso-8859-11.txt diff --git a/src/uchardet/uchardet/test/th/tis-620.txt b/test/test_files/encoding/uchardet/th/tis-620.txt similarity index 100% rename from src/uchardet/uchardet/test/th/tis-620.txt rename to test/test_files/encoding/uchardet/th/tis-620.txt diff --git a/src/uchardet/uchardet/test/th/utf-8.txt b/test/test_files/encoding/uchardet/th/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/th/utf-8.txt rename to test/test_files/encoding/uchardet/th/utf-8.txt diff --git a/src/uchardet/uchardet/test/tr/iso-8859-3.txt b/test/test_files/encoding/uchardet/tr/iso-8859-3.txt similarity index 100% rename from src/uchardet/uchardet/test/tr/iso-8859-3.txt rename to test/test_files/encoding/uchardet/tr/iso-8859-3.txt diff --git a/src/uchardet/uchardet/test/tr/iso-8859-9.txt b/test/test_files/encoding/uchardet/tr/iso-8859-9.txt similarity index 100% rename from src/uchardet/uchardet/test/tr/iso-8859-9.txt rename to test/test_files/encoding/uchardet/tr/iso-8859-9.txt diff --git a/src/uchardet/uchardet/test/uchardet-tests.c b/test/test_files/encoding/uchardet/uchardet-tests.c similarity index 100% rename from src/uchardet/uchardet/test/uchardet-tests.c rename to test/test_files/encoding/uchardet/uchardet-tests.c diff --git a/src/uchardet/uchardet/test/vi/utf-8.txt b/test/test_files/encoding/uchardet/vi/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/vi/utf-8.txt rename to test/test_files/encoding/uchardet/vi/utf-8.txt diff --git a/src/uchardet/uchardet/test/vi/viscii.txt b/test/test_files/encoding/uchardet/vi/viscii.txt similarity index 100% rename from src/uchardet/uchardet/test/vi/viscii.txt rename to test/test_files/encoding/uchardet/vi/viscii.txt diff --git a/src/uchardet/uchardet/test/vi/windows-1258.txt b/test/test_files/encoding/uchardet/vi/windows-1258.txt similarity index 100% rename from src/uchardet/uchardet/test/vi/windows-1258.txt rename to test/test_files/encoding/uchardet/vi/windows-1258.txt diff --git a/src/uchardet/uchardet/test/zh/big5.txt b/test/test_files/encoding/uchardet/zh/big5.txt similarity index 100% rename from src/uchardet/uchardet/test/zh/big5.txt rename to test/test_files/encoding/uchardet/zh/big5.txt diff --git a/src/uchardet/uchardet/test/zh/euc-tw.txt b/test/test_files/encoding/uchardet/zh/euc-tw.txt similarity index 100% rename from src/uchardet/uchardet/test/zh/euc-tw.txt rename to test/test_files/encoding/uchardet/zh/euc-tw.txt diff --git a/src/uchardet/uchardet/test/zh/gb18030.txt b/test/test_files/encoding/uchardet/zh/gb18030.txt similarity index 100% rename from src/uchardet/uchardet/test/zh/gb18030.txt rename to test/test_files/encoding/uchardet/zh/gb18030.txt diff --git a/src/uchardet/uchardet/test/zh/utf-8.txt b/test/test_files/encoding/uchardet/zh/utf-8.txt similarity index 100% rename from src/uchardet/uchardet/test/zh/utf-8.txt rename to test/test_files/encoding/uchardet/zh/utf-8.txt