aboutsummaryrefslogtreecommitdiff
path: root/Src/external_dependencies/openmpt-trunk/common/mptString.h
blob: c308eef94625cc24e49bf7bfc34f495743c1d177 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
/*
 * mptString.h
 * ----------
 * Purpose: Small string-related utilities, number and message formatting.
 * Notes  : Currently none.
 * Authors: OpenMPT Devs
 * The OpenMPT source code is released under the BSD license. Read LICENSE for more details.
 */


#pragma once

#include "openmpt/all/BuildSettings.hpp"

#include "mpt/base/alloc.hpp"
#include "mpt/base/span.hpp"
#include "mpt/string/types.hpp"
#include "mpt/string/utility.hpp"

#include "mptBaseTypes.h"

#include <algorithm>
#include <limits>
#include <string>
#include <string_view>

#include <cstring>



OPENMPT_NAMESPACE_BEGIN


namespace mpt
{



namespace String
{


template <typename Tstring, typename Tstring2, typename Tstring3>
inline Tstring Replace(Tstring str, const Tstring2 &oldStr, const Tstring3 &newStr)
{
	return mpt::replace(str, oldStr, newStr);
}


} // namespace String


enum class Charset {

	UTF8,

	ASCII, // strictly 7-bit ASCII

	ISO8859_1,
	ISO8859_15,

	CP850,
	CP437,
	CP437AMS,
	CP437AMS2,

	Windows1252,

	Amiga,
	RISC_OS,

	ISO8859_1_no_C1,
	ISO8859_15_no_C1,
	Amiga_no_C1,

#if defined(MPT_ENABLE_CHARSET_LOCALE)
	Locale, // CP_ACP on windows, current C locale otherwise
#endif // MPT_ENABLE_CHARSET_LOCALE

};



// source code / preprocessor (i.e. # token)
inline constexpr Charset CharsetSource = Charset::ASCII;

// debug log files
inline constexpr Charset CharsetLogfile = Charset::UTF8;

// std::clog / std::cout / std::cerr
#if defined(MODPLUG_TRACKER) && MPT_OS_WINDOWS && defined(MPT_ENABLE_CHARSET_LOCALE)
inline constexpr Charset CharsetStdIO = Charset::Locale;
#else
inline constexpr Charset CharsetStdIO = Charset::UTF8;
#endif

// getenv
#if defined(MPT_ENABLE_CHARSET_LOCALE)
inline constexpr Charset CharsetEnvironment = Charset::Locale;
#else
inline constexpr Charset CharsetEnvironment = Charset::UTF8;
#endif

// std::exception::what()
#if defined(MPT_ENABLE_CHARSET_LOCALE)
inline constexpr Charset CharsetException = Charset::Locale;
#else
inline constexpr Charset CharsetException = Charset::UTF8;
#endif



// Checks if the std::string represents an UTF8 string.
// This is currently implemented as converting to std::wstring and back assuming UTF8 both ways,
// and comparing the result to the original string.
// Caveats:
//  - can give false negatives because of possible unicode normalization during conversion
//  - can give false positives if the 8bit encoding contains high-ascii only in valid utf8 groups
//  - slow because of double conversion
bool IsUTF8(const std::string &str);



#if MPT_WSTRING_CONVERT
// Convert to a wide character string.
// The wide encoding is UTF-16 or UTF-32, based on sizeof(wchar_t).
// If str does not contain any invalid characters, this conversion is lossless.
// Invalid source bytes will be replaced by some replacement character or string.
inline std::wstring ToWide(const std::wstring &str) { return str; }
inline std::wstring ToWide(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
std::wstring ToWide(Charset from, const std::string &str);
inline std::wstring ToWide(Charset from, const char * str) { return ToWide(from, str ? std::string(str) : std::string()); }
#if defined(MPT_ENABLE_CHARSET_LOCALE)
std::wstring ToWide(const mpt::lstring &str);
#endif // MPT_ENABLE_CHARSET_LOCALE
#endif

// Convert to a string encoded in the 'to'-specified character set.
// If str does not contain any invalid characters,
// this conversion will be lossless iff, and only iff,
// 'to' is UTF8.
// Invalid source bytes or characters that are not representable in the
// destination charset will be replaced by some replacement character or string.
#if MPT_WSTRING_CONVERT
std::string ToCharset(Charset to, const std::wstring &str);
inline std::string ToCharset(Charset to, const wchar_t * str) { return ToCharset(to, str ? std::wstring(str) : std::wstring()); }
#endif
std::string ToCharset(Charset to, Charset from, const std::string &str);
inline std::string ToCharset(Charset to, Charset from, const char * str) { return ToCharset(to, from, str ? std::string(str) : std::string()); }
#if defined(MPT_ENABLE_CHARSET_LOCALE)
std::string ToCharset(Charset to, const mpt::lstring &str);
#endif // MPT_ENABLE_CHARSET_LOCALE

#if defined(MPT_ENABLE_CHARSET_LOCALE)
#if MPT_WSTRING_CONVERT
mpt::lstring ToLocale(const std::wstring &str);
inline mpt::lstring ToLocale(const wchar_t * str) { return ToLocale(str ? std::wstring(str): std::wstring()); }
#endif
mpt::lstring ToLocale(Charset from, const std::string &str);
inline mpt::lstring ToLocale(Charset from, const char * str) { return ToLocale(from, str ? std::string(str): std::string()); }
inline mpt::lstring ToLocale(const mpt::lstring &str) { return str; }
#endif // MPT_ENABLE_CHARSET_LOCALE

#if MPT_OS_WINDOWS
#if MPT_WSTRING_CONVERT
mpt::winstring ToWin(const std::wstring &str);
inline mpt::winstring ToWin(const wchar_t * str) { return ToWin(str ? std::wstring(str): std::wstring()); }
#endif
mpt::winstring ToWin(Charset from, const std::string &str);
inline mpt::winstring ToWin(Charset from, const char * str) { return ToWin(from, str ? std::string(str): std::string()); }
#if defined(MPT_ENABLE_CHARSET_LOCALE)
mpt::winstring ToWin(const mpt::lstring &str);
#endif // MPT_ENABLE_CHARSET_LOCALE
#endif // MPT_OS_WINDOWS


#if defined(MPT_WITH_MFC)
#if !(MPT_WSTRING_CONVERT)
#error "MFC depends on MPT_WSTRING_CONVERT)"
#endif

// Convert to a MFC CString. The CString encoding depends on UNICODE.
// This should also be used when converting to TCHAR strings.
// If UNICODE is defined, this is a completely lossless operation.
inline CString ToCString(const CString &str) { return str; }
CString ToCString(const std::wstring &str);
inline CString ToCString(const wchar_t * str) { return ToCString(str ? std::wstring(str) : std::wstring()); }
CString ToCString(Charset from, const std::string &str);
inline CString ToCString(Charset from, const char * str) { return ToCString(from, str ? std::string(str) : std::string()); }
#if defined(MPT_ENABLE_CHARSET_LOCALE)
CString ToCString(const mpt::lstring &str);
mpt::lstring ToLocale(const CString &str);
#endif // MPT_ENABLE_CHARSET_LOCALE
#if MPT_OS_WINDOWS
mpt::winstring ToWin(const CString &str);
#endif // MPT_OS_WINDOWS

// Convert from a MFC CString. The CString encoding depends on UNICODE.
// This should also be used when converting from TCHAR strings.
// If UNICODE is defined, this is a completely lossless operation.
std::wstring ToWide(const CString &str);
std::string ToCharset(Charset to, const CString &str);

#endif // MPT_WITH_MFC



#define UC_(x)           MPT_UCHAR(x)
#define UL_(x)           MPT_ULITERAL(x)
#define U_(x)            MPT_USTRING(x)



#if MPT_USTRING_MODE_WIDE
#if !(MPT_WSTRING_CONVERT)
#error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
#endif
inline mpt::ustring ToUnicode(const std::wstring &str) { return str; }
inline mpt::ustring ToUnicode(const wchar_t * str) { return (str ? std::wstring(str) : std::wstring()); }
inline mpt::ustring ToUnicode(Charset from, const std::string &str) { return ToWide(from, str); }
inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
#if defined(MPT_ENABLE_CHARSET_LOCALE)
inline mpt::ustring ToUnicode(const mpt::lstring &str) { return ToWide(str); }
#endif // MPT_ENABLE_CHARSET_LOCALE
#if defined(MPT_WITH_MFC)
inline mpt::ustring ToUnicode(const CString &str) { return ToWide(str); }
#endif // MFC
#else // !MPT_USTRING_MODE_WIDE
inline mpt::ustring ToUnicode(const mpt::ustring &str) { return str; }
#if MPT_WSTRING_CONVERT
mpt::ustring ToUnicode(const std::wstring &str);
inline mpt::ustring ToUnicode(const wchar_t * str) { return ToUnicode(str ? std::wstring(str) : std::wstring()); }
#endif
mpt::ustring ToUnicode(Charset from, const std::string &str);
inline mpt::ustring ToUnicode(Charset from, const char * str) { return ToUnicode(from, str ? std::string(str) : std::string()); }
#if defined(MPT_ENABLE_CHARSET_LOCALE)
mpt::ustring ToUnicode(const mpt::lstring &str);
#endif // MPT_ENABLE_CHARSET_LOCALE
#if defined(MPT_WITH_MFC)
mpt::ustring ToUnicode(const CString &str);
#endif // MPT_WITH_MFC
#endif // MPT_USTRING_MODE_WIDE

#if MPT_USTRING_MODE_WIDE
#if !(MPT_WSTRING_CONVERT)
#error "MPT_USTRING_MODE_WIDE depends on MPT_WSTRING_CONVERT)"
#endif
// nothing, std::wstring overloads will catch all stuff
#else // !MPT_USTRING_MODE_WIDE
#if MPT_WSTRING_CONVERT
std::wstring ToWide(const mpt::ustring &str);
#endif
std::string ToCharset(Charset to, const mpt::ustring &str);
#if defined(MPT_ENABLE_CHARSET_LOCALE)
mpt::lstring ToLocale(const mpt::ustring &str);
#endif // MPT_ENABLE_CHARSET_LOCALE
#if MPT_OS_WINDOWS
mpt::winstring ToWin(const mpt::ustring &str);
#endif // MPT_OS_WINDOWS
#if defined(MPT_WITH_MFC)
CString ToCString(const mpt::ustring &str);
#endif // MPT_WITH_MFC
#endif // MPT_USTRING_MODE_WIDE





// The MPT_UTF8 allows specifying UTF8 char arrays.
// The resulting type is mpt::ustring and the construction might require runtime translation,
// i.e. it is NOT generally available at compile time.
// Use explicit UTF8 encoding,
// i.e. U+00FC (LATIN SMALL LETTER U WITH DIAERESIS) would be written as "\xC3\xBC".
#define MPT_UTF8(x) mpt::ToUnicode(mpt::Charset::UTF8, x)





mpt::ustring ToUnicode(uint16 codepage, mpt::Charset fallback, const std::string &str);





char ToLowerCaseAscii(char c);
char ToUpperCaseAscii(char c);
std::string ToLowerCaseAscii(std::string s);
std::string ToUpperCaseAscii(std::string s);

int CompareNoCaseAscii(const char *a, const char *b, std::size_t n);
int CompareNoCaseAscii(std::string_view a, std::string_view b);
int CompareNoCaseAscii(const std::string &a, const std::string &b);


#if defined(MODPLUG_TRACKER)

mpt::ustring ToLowerCase(const mpt::ustring &s);
mpt::ustring ToUpperCase(const mpt::ustring &s);

#endif // MODPLUG_TRACKER





} // namespace mpt





// The AnyString types are meant to be used as function argument types only,
// and only during the transition phase to all-unicode strings in the whole codebase.
// Using an AnyString type as function argument avoids the need to overload a function for all the
// different string types that we currently have.
// Warning: These types will silently do charset conversions. Only use them when this can be tolerated.

// BasicAnyString is convertable to mpt::ustring and constructable from any string at all.
template <mpt::Charset charset = mpt::Charset::UTF8, bool tryUTF8 = true>
class BasicAnyString : public mpt::ustring
{

private:
	
	static mpt::ustring From8bit(const std::string &str)
	{
		if constexpr(charset == mpt::Charset::UTF8)
		{
			return mpt::ToUnicode(mpt::Charset::UTF8, str);
		} else
		{
			// auto utf8 detection
			if constexpr(tryUTF8)
			{
				if(mpt::IsUTF8(str))
				{
					return mpt::ToUnicode(mpt::Charset::UTF8, str);
				} else
				{
					return mpt::ToUnicode(charset, str);
				}
			} else
			{
				return mpt::ToUnicode(charset, str);
			}
		}
	}

public:

	// 8 bit
	BasicAnyString(const char *str) : mpt::ustring(From8bit(str ? str : std::string())) { }
	BasicAnyString(const std::string str) : mpt::ustring(From8bit(str)) { }

	// locale
#if defined(MPT_ENABLE_CHARSET_LOCALE)
	BasicAnyString(const mpt::lstring str) : mpt::ustring(mpt::ToUnicode(str)) { }
#endif // MPT_ENABLE_CHARSET_LOCALE

	// unicode
	BasicAnyString(const mpt::ustring &str) : mpt::ustring(str) { }
	BasicAnyString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
#if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
	BasicAnyString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
#endif
#if MPT_WSTRING_CONVERT
	BasicAnyString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
#endif

	// mfc
#if defined(MPT_WITH_MFC)
	BasicAnyString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
#endif // MPT_WITH_MFC

	// fallback for custom string types
	template <typename Tstring> BasicAnyString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
	template <typename Tstring> BasicAnyString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }

};

// AnyUnicodeString is convertable to mpt::ustring and constructable from any unicode string,
class AnyUnicodeString : public mpt::ustring
{

public:

	// locale
#if defined(MPT_ENABLE_CHARSET_LOCALE)
	AnyUnicodeString(const mpt::lstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
#endif // MPT_ENABLE_CHARSET_LOCALE

	// unicode
	AnyUnicodeString(const mpt::ustring &str) : mpt::ustring(str) { }
	AnyUnicodeString(mpt::ustring &&str) : mpt::ustring(std::move(str)) { }
#if MPT_USTRING_MODE_UTF8 && MPT_WSTRING_CONVERT
	AnyUnicodeString(const std::wstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
#endif
#if MPT_WSTRING_CONVERT
	AnyUnicodeString(const wchar_t *str) : mpt::ustring(str ? mpt::ToUnicode(str) : mpt::ustring()) { }
#endif

	// mfc
#if defined(MPT_WITH_MFC)
	AnyUnicodeString(const CString &str) : mpt::ustring(mpt::ToUnicode(str)) { }
#endif // MPT_WITH_MFC

	// fallback for custom string types
	template <typename Tstring> AnyUnicodeString(const Tstring &str) : mpt::ustring(mpt::ToUnicode(str)) { }
	template <typename Tstring> AnyUnicodeString(Tstring &&str) : mpt::ustring(mpt::ToUnicode(std::forward<Tstring>(str))) { }

};

// AnyString
// Try to do the smartest auto-magic we can do.
#if defined(MPT_ENABLE_CHARSET_LOCALE)
using AnyString = BasicAnyString<mpt::Charset::Locale, true>;
#elif MPT_OS_WINDOWS
using AnyString = BasicAnyString<mpt::Charset::Windows1252, true>;
#else
using AnyString = BasicAnyString<mpt::Charset::ISO8859_1, true>;
#endif

// AnyStringLocale
// char-based strings are assumed to be in locale encoding.
#if defined(MPT_ENABLE_CHARSET_LOCALE)
using AnyStringLocale = BasicAnyString<mpt::Charset::Locale, false>;
#else
using AnyStringLocale = BasicAnyString<mpt::Charset::UTF8, false>;
#endif

// AnyStringUTF8orLocale
// char-based strings are tried in UTF8 first, if this fails, locale is used.
#if defined(MPT_ENABLE_CHARSET_LOCALE)
using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::Locale, true>;
#else
using AnyStringUTF8orLocale = BasicAnyString<mpt::Charset::UTF8, false>;
#endif

// AnyStringUTF8
// char-based strings are assumed to be in UTF8.
using AnyStringUTF8 = BasicAnyString<mpt::Charset::UTF8, false>;



OPENMPT_NAMESPACE_END