Microsoft royally screwed up Unicode on Windows by not supporting UTF-8, only UTF-16.
So there are three ways to get Unicode support in a program:
* Use TCHAR stuff. Macros everywhere! Ugly as hell. Text is 8-bit in an "ANSI" build, and 16-bit in a "Unicode" build. Need to make a "tchar.h" file for Linux builds, that basically drops the _t prefixes from functions.
* Use the Unicode functions directly.
* Make shims that convert text to and from UTF-8 around stuff that really needs Unicode support (displayed text and filenames).
If you want to change as little code as possible, I'd go with the Unicode shims.
An example of overriding fopen: (make this into a header file somewhere)
Code:
#pragma once
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <stdio.h>
#include <shellapi.h>
static inline FILE* my_fopen(const char *Filename, const char *Mode)
{
FILE *file = NULL;
int filename_length = strlen(Filename);
int filemode_length = strlen(Mode);
int filename_buffer_size = filename_length+1;
int filemode_buffer_size = filemode_length+1;
wchar_t *filename_buffer = (wchar_t*)malloc(sizeof(wchar_t)*(filename_length+1));
wchar_t *filemode_buffer = (wchar_t*)malloc(sizeof(wchar_t)*(filemode_length+1));
do
{
if (0==MultiByteToWideChar(CP_UTF8,0,Filename,-1,filename_buffer,filename_buffer_size)) break;
if (0==MultiByteToWideChar(CP_UTF8,0,Mode,-1,filemode_buffer,filemode_buffer_size)) break;
file = _wfopen(filename_buffer,filemode_buffer);
} while (false);
free(filename_buffer);
free(filemode_buffer);
return file;
}
static inline char** GetArgv()
{
int nArgs;
wchar_t **wargv = CommandLineToArgvW(GetCommandLineW(), &nArgs);
char ** argv = (char**)malloc((nArgs+1)*sizeof(char*));
argv[nArgs] = NULL;
for (int i=0;i < nArgs;i++)
{
int bufferSize = WideCharToMultiByte(CP_UTF8,0,wargv[i],-1,NULL,0,NULL,NULL);
char *buffer = (char*)malloc(bufferSize*sizeof(char));
WideCharToMultiByte(CP_UTF8,0,wargv[i],-1,buffer,bufferSize,NULL,NULL);
argv[i] = buffer;
}
LocalFree(wargv);
return argv;
}
#define fopen(FileName,FileMode) my_fopen(FileName,FileMode)
After you include that, any call to fopen() will expect the filename to be UTF-8 encoded in Windows, thus giving it Unicode support.
Then you need a part where it changes argv to the result of "GetArgv()".
The advantage to this method is that you change minimal code. The disadvantage to this method is that it only affects your code. Libraries still use ANSI functions to access files and display text, and won't work if you pass UTF-8 filenames or text to them.