aboutsummaryrefslogtreecommitdiff
path: root/Src/external_dependencies/openmpt-trunk/common/mptString.h
diff options
context:
space:
mode:
authorJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
committerJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
commit20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/external_dependencies/openmpt-trunk/common/mptString.h
parent537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
downloadwinamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
Initial community commit
Diffstat (limited to 'Src/external_dependencies/openmpt-trunk/common/mptString.h')
-rw-r--r--Src/external_dependencies/openmpt-trunk/common/mptString.h446
1 files changed, 446 insertions, 0 deletions
diff --git a/Src/external_dependencies/openmpt-trunk/common/mptString.h b/Src/external_dependencies/openmpt-trunk/common/mptString.h
new file mode 100644
index 00000000..c308eef9
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/common/mptString.h
@@ -0,0 +1,446 @@
+/*
+ * mptString.h
+ * ----------
+ * Purpose: Small string-related utilities, number and message formatting.
+ * Notes : Currently none.
+ * Authors: OpenMPT Devs
+ * The OpenMPT source code is released under the BSD license. Read LICENSE for more details.
+ */
+
+
+#pragma once
+
+#include "openmpt/all/BuildSettings.hpp"
+
+#include "mpt/base/alloc.hpp"
+#include "mpt/base/span.hpp"
+#include "mpt/string/types.hpp"
+#include "mpt/string/utility.hpp"
+
+#include "mptBaseTypes.h"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <string_view>
+
+#include <cstring>
+
+
+
+OPENMPT_NAMESPACE_BEGIN
+
+
+namespace mpt
+{
+
+
+
+namespace String
+{
+
+
+template <typename Tstring, typename Tstring2, typename Tstring3>
+inline Tstring Replace(Tstring str, const Tstring2 &oldStr, const Tstring3 &newStr)
+{
+ return mpt::replace(str, oldStr, newStr);
+}
+
+
+} // namespace String
+
+
+enum class Charset {
+
+ UTF8,
+
+ ASCII, // strictly 7-bit ASCII
+
+ ISO8859_1,
+ ISO8859_15,
+
+ CP850,
+ CP437,
+ CP437AMS,
+ CP437AMS2,
+
+ Windows1252,
+
+ Amiga,
+ RISC_OS,
+
+ ISO8859_1_no_C1,
+ ISO8859_15_no_C1,
+ Amiga_no_C1,
+
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+ Locale, // CP_ACP on windows, current C locale otherwise
+#endif // MPT_ENABLE_CHARSET_LOCALE
+
+};
+
+
+
+// source code / preprocessor (i.e. # token)
+inline constexpr Charset CharsetSource = Charset::ASCII;
+
+// debug log files
+inline constexpr Charset CharsetLogfile = Charset::UTF8;
+
+// std::clog / std::cout / std::cerr
+#if defined(MODPLUG_TRACKER) && MPT_OS_WINDOWS && defined(MPT_ENABLE_CHARSET_LOCALE)
+inline constexpr Charset CharsetStdIO = Charset::Locale;
+#else
+inline constexpr Charset CharsetStdIO = Charset::UTF8;
+#endif
+
+// getenv
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+inline constexpr Charset CharsetEnvironment = Charset::Locale;
+#else
+inline constexpr Charset CharsetEnvironment = Charset::UTF8;
+#endif
+
+// std::exception::what()
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+inline constexpr Charset CharsetException = Charset::Locale;
+#else
+inline constexpr Charset CharsetException = Charset::UTF8;
+#endif
+
+
+
+// Checks if the std::string represents an UTF8 string.
+// This is currently implemented as converting to std::wstring and back assuming UTF8 both ways,
+// and comparing the result to the original string.
+// Caveats:
+// - can give false negatives because of possible unicode normalization during conversion
+// - can give false positives if the 8bit encoding contains high-ascii only in valid utf8 groups
+// - slow because of double conversion
+bool IsUTF8(const std::string &str);
+
+
+
+#if MPT_WSTRING_CONVERT
+// Convert to a wide character string.
+// The wide encoding is UTF-16 or UTF-32, based on sizeof(wchar_t).
+// If str does not contain any invalid characters, this conversion is lossless.
+// Invalid source bytes will be replaced by some replacement character or string.
+inline std::wstring ToWide(const std::wstring &str) { return str; }
+inline std::wstring ToWide(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
+std::wstring ToWide(Charset from, const std::string &str);
+inline std::wstring ToWide(Charset from, const char * str) { return ToWide(from, str ? std::string(str) : std::string()); }
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+std::wstring ToWide(const mpt::lstring &str);
+#endif // MPT_ENABLE_CHARSET_LOCALE
+#endif
+
+// Convert to a string encoded in the 'to'-specified character set.
+// If str does not contain any invalid characters,
+// this conversion will be lossless iff, and only iff,
+// 'to' is UTF8.
+// Invalid source bytes or characters that are not representable in the
+// destination charset will be replaced by some replacement character or string.
+#if MPT_WSTRING_CONVERT
+std::string ToCharset(Charset to, const std::wstring &str);
+inline std::string ToCharset(Charset to, const wchar_t * str) { return ToCharset(to, str ? std::wstring(str) : std::wstring()); }
+#endif
+std::string ToCharset(Charset to, Charset from, const std::string &str);
+inline std::string ToCharset(Charset to, Charset from, const char * str) { return ToCharset(to, from, str ? std::string(str) : std::string()); }
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+std::string ToCharset(Charset to, const mpt::lstring &str);
+#endif // MPT_ENABLE_CHARSET_LOCALE
+
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+#if MPT_WSTRING_CONVERT
+mpt::lstring ToLocale(const std::wstring &str);
+inline mpt::lstring ToLocale(const wchar_t * str) { return ToLocale(str ? std::wstring(str): std::wstring()); }
+#endif
+mpt::lstring ToLocale(Charset from, const std::string &str);
+inline mpt::lstring ToLocale(Charset from, const char * str) { return ToLocale(from, str ? std::string(str): std::string()); }
+inline mpt::lstring ToLocale(const mpt::lstring &str) { return str; }
+#endif // MPT_ENABLE_CHARSET_LOCALE
+
+#if MPT_OS_WINDOWS
+#if MPT_WSTRING_CONVERT
+mpt::winstring ToWin(const std::wstring &str);
+inline mpt::winstring ToWin(const wchar_t * str) { return ToWin(str ? std::wstring(str): std::wstring()); }
+#endif
+mpt::winstring ToWin(Charset from, const std::string &str);
+inline mpt::winstring ToWin(Charset from, const char * str) { return ToWin(from, str ? std::string(str): std::string()); }
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+mpt::winstring ToWin(const mpt::lstring &str);
+#endif // MPT_ENABLE_CHARSET_LOCALE
+#endif // MPT_OS_WINDOWS
+
+
+#if defined(MPT_WITH_MFC)
+#if !(MPT_WSTRING_CONVERT)
+#error "MFC depends on MPT_WSTRING_CONVERT)"
+#endif
+
+// Convert to a MFC CString. The CString encoding depends on UNICODE.
+// This should also be used when converting to TCHAR strings.
+// If UNICODE is defined, this is a completely lossless operation.
+inline CString ToCString(const CString &str) { return str; }
+CString ToCString(const std::wstring &str);
+inline CString ToCString(const wchar_t * str) { return ToCString(str ? std::wstring(str) : std::wstring()); }
+CString ToCString(Charset from, const std::string &str);
+inline CString ToCString(Charset from, const char * str) { return ToCString(from, str ? std::string(str) : std::string()); }
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+CString ToCString(const mpt::lstring &str);
+mpt::lstring ToLocale(const CString &str);
+#endif // MPT_ENABLE_CHARSET_LOCALE
+#if MPT_OS_WINDOWS
+mpt::winstring ToWin(const CString &str);
+#endif // MPT_OS_WINDOWS
+
+// Convert from a MFC CString. The CString encoding depends on UNICODE.
+// This should also be used when converting from TCHAR strings.
+// If UNICODE is defined, this is a completely lossless operation.
+std::wstring ToWide(const CString &str);
+std::string ToCharset(Charset to, const CString &str);
+
+#endif // MPT_WITH_MFC
+
+
+
+#define UC_(x) MPT_UCHAR(x)
+#define UL_(x) MPT_ULITERAL(x)
+#define U_(x) MPT_USTRING(x)
+
+
+
+#if MPT_USTRING_MODE_WIDE
+#if !(MPT_WSTRING_CONVERT)
+#error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
+#endif
+inline mpt::ustring ToUnicode(const std::wstring &str) { return str; }
+inline mpt::ustring ToUnicode(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
+inline mpt::ustring ToUnicode(Charset from, const std::string &str) { return ToWide(from, str); }
+inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+inline mpt::ustring ToUnicode(const mpt::lstring &str) { return ToWide(str); }
+#endif // MPT_ENABLE_CHARSET_LOCALE
+#if defined(MPT_WITH_MFC)
+inline mpt::ustring ToUnicode(const CString &str) { return ToWide(str); }
+#endif // MFC
+#else // !MPT_USTRING_MODE_WIDE
+inline mpt::ustring ToUnicode(const mpt::ustring &str) { return str; }
+#if MPT_WSTRING_CONVERT
+mpt::ustring ToUnicode(const std::wstring &str);
+inline mpt::ustring ToUnicode(const wchar_t * str) { return ToUnicode(str ? std::wstring(str) : std::wstring()); }
+#endif
+mpt::ustring ToUnicode(Charset from, const std::string &str);
+inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+mpt::ustring ToUnicode(const mpt::lstring &str);
+#endif // MPT_ENABLE_CHARSET_LOCALE
+#if defined(MPT_WITH_MFC)
+mpt::ustring ToUnicode(const CString &str);
+#endif // MPT_WITH_MFC
+#endif // MPT_USTRING_MODE_WIDE
+
+#if MPT_USTRING_MODE_WIDE
+#if !(MPT_WSTRING_CONVERT)
+#error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
+#endif
+// nothing, std::wstring overloads will catch all stuff
+#else // !MPT_USTRING_MODE_WIDE
+#if MPT_WSTRING_CONVERT
+std::wstring ToWide(const mpt::ustring &str);
+#endif
+std::string ToCharset(Charset to, const mpt::ustring &str);
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+mpt::lstring ToLocale(const mpt::ustring &str);
+#endif // MPT_ENABLE_CHARSET_LOCALE
+#if MPT_OS_WINDOWS
+mpt::winstring ToWin(const mpt::ustring &str);
+#endif // MPT_OS_WINDOWS
+#if defined(MPT_WITH_MFC)
+CString ToCString(const mpt::ustring &str);
+#endif // MPT_WITH_MFC
+#endif // MPT_USTRING_MODE_WIDE
+
+
+
+
+
+// The MPT_UTF8 allows specifying UTF8 char arrays.
+// The resulting type is mpt::ustring and the construction might require runtime translation,
+// i.e. it is NOT generally available at compile time.
+// Use explicit UTF8 encoding,
+// i.e. U+00FC (LATIN SMALL LETTER U WITH DIAERESIS) would be written as "\xC3\xBC".
+#define MPT_UTF8(x) mpt::ToUnicode(mpt::Charset::UTF8, x)
+
+
+
+
+
+mpt::ustring ToUnicode(uint16 codepage, mpt::Charset fallback, const std::string &str);
+
+
+
+
+
+char ToLowerCaseAscii(char c);
+char ToUpperCaseAscii(char c);
+std::string ToLowerCaseAscii(std::string s);
+std::string ToUpperCaseAscii(std::string s);
+
+int CompareNoCaseAscii(const char *a, const char *b, std::size_t n);
+int CompareNoCaseAscii(std::string_view a, std::string_view b);
+int CompareNoCaseAscii(const std::string &a, const std::string &b);
+
+
+#if defined(MODPLUG_TRACKER)
+
+mpt::ustring ToLowerCase(const mpt::ustring &s);
+mpt::ustring ToUpperCase(const mpt::ustring &s);
+
+#endif // MODPLUG_TRACKER
+
+
+
+
+
+} // namespace mpt
+
+
+
+
+
+// The AnyString types are meant to be used as function argument types only,
+// and only during the transition phase to all-unicode strings in the whole codebase.
+// Using an AnyString type as function argument avoids the need to overload a function for all the
+// different string types that we currently have.
+// Warning: These types will silently do charset conversions. Only use them when this can be tolerated.
+
+// BasicAnyString is convertable to mpt::ustring and constructable from any string at all.
+template <mpt::Charset charset = mpt::Charset::UTF8, bool tryUTF8 = true>
+class BasicAnyString : public mpt::ustring
+{
+
+private:
+
+ static mpt::ustring From8bit(const std::string &str)
+ {
+ if constexpr(charset == mpt::Charset::UTF8)
+ {
+ return mpt::ToUnicode(mpt::Charset::UTF8, str);
+ } else
+ {
+ // auto utf8 detection
+ if constexpr(tryUTF8)
+ {
+ if(mpt::IsUTF8(str))
+ {
+ return mpt::ToUnicode(mpt::Charset::UTF8, str);
+ } else
+ {
+ return mpt::ToUnicode(charset, str);
+ }
+ } else
+ {
+ return mpt::ToUnicode(charset, str);
+ }
+ }
+ }
+
+public:
+
+ // 8 bit
+ BasicAnyString(const char *str) : mpt::ustring(From8bit(str ? str : std::string())) { }
+ BasicAnyString(const std::string str) : mpt::ustring(From8bit(str)) { }
+
+ // locale
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+ BasicAnyString(const mpt::lstring str) : mpt::ustring(mpt::ToUnicode(str)) { }
+#endif // MPT_ENABLE_CHARSET_LOCALE
+
+ // unicode
+ BasicAnyString(const mpt::ustring &str) : mpt::ustring(str) { }
+ BasicAnyString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
+#if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
+ BasicAnyString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+#endif
+#if MPT_WSTRING_CONVERT
+ BasicAnyString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
+#endif
+
+ // mfc
+#if defined(MPT_WITH_MFC)
+ BasicAnyString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+#endif // MPT_WITH_MFC
+
+ // fallback for custom string types
+ template <typename Tstring> BasicAnyString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+ template <typename Tstring> BasicAnyString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }
+
+};
+
+// AnyUnicodeString is convertable to mpt::ustring and constructable from any unicode string,
+class AnyUnicodeString : public mpt::ustring
+{
+
+public:
+
+ // locale
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+ AnyUnicodeString(const mpt::lstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+#endif // MPT_ENABLE_CHARSET_LOCALE
+
+ // unicode
+ AnyUnicodeString(const mpt::ustring &str) : mpt::ustring(str) { }
+ AnyUnicodeString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
+#if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
+ AnyUnicodeString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+#endif
+#if MPT_WSTRING_CONVERT
+ AnyUnicodeString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
+#endif
+
+ // mfc
+#if defined(MPT_WITH_MFC)
+ AnyUnicodeString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+#endif // MPT_WITH_MFC
+
+ // fallback for custom string types
+ template <typename Tstring> AnyUnicodeString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
+ template <typename Tstring> AnyUnicodeString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }
+
+};
+
+// AnyString
+// Try to do the smartest auto-magic we can do.
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+using AnyString = BasicAnyString<mpt::Charset::Locale, true>;
+#elif MPT_OS_WINDOWS
+using AnyString = BasicAnyString<mpt::Charset::Windows1252, true>;
+#else
+using AnyString = BasicAnyString<mpt::Charset::ISO8859_1, true>;
+#endif
+
+// AnyStringLocale
+// char-based strings are assumed to be in locale encoding.
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+using AnyStringLocale = BasicAnyString<mpt::Charset::Locale, false>;
+#else
+using AnyStringLocale = BasicAnyString<mpt::Charset::UTF8, false>;
+#endif
+
+// AnyStringUTF8orLocale
+// char-based strings are tried in UTF8 first, if this fails, locale is used.
+#if defined(MPT_ENABLE_CHARSET_LOCALE)
+using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::Locale, true>;
+#else
+using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::UTF8, false>;
+#endif
+
+// AnyStringUTF8
+// char-based strings are assumed to be in UTF8.
+using AnyStringUTF8 = BasicAnyString<mpt::Charset::UTF8, false>;
+
+
+
+OPENMPT_NAMESPACE_END