//
// C++ Implementation: btstringmgr
//
// Description: 
//
//
// Author: The BibleTime team <info@bibletime.info>, (C) 2004
//
// Copyright: See COPYING file that comes with this distribution
//
//

#include <CoreFoundation/CoreFoundation.h>
#include "msstringmgr.h"

//System includes
#include <ctype.h>

char *MSStringMgr::upperUTF8(char *text, unsigned int max) const {
    
	if (isUtf8(text)) {
		
        const int maxlen = (max > 0) ? max : strlen(text);
        
        CFStringRef cfString = CFStringCreateWithCString(NULL, text, kCFStringEncodingUTF8);
        //CFStringCreateWithBytes(NULL, (UInt8*)text, maxlen, kCFStringEncodingUTF8, false);
		if(cfString) {
            CFMutableStringRef mstr = CFStringCreateMutableCopy(NULL, 0, cfString);
            CFStringUppercase(mstr, NULL);
            
            CFStringGetBytes (
                              mstr,
                              CFRangeMake(0, CFStringGetLength(mstr)),
                              kCFStringEncodingUTF8,
                              0,
                              false,
                              (UInt8*)text,
                              maxlen,
                              NULL
                              );
            
            CFRelease(cfString);
            CFRelease(mstr);
        } else {
            return upperLatin1(text, max);
        }
        
		return text;
	} else {
        return upperLatin1(text, max);
	}
    
	return text;
}

char *MSStringMgr::upperLatin1(char *text, unsigned int max) const {
	char* ret = text;	
	
	while(*text) {
        *text++;
		*text = toupper(*text);
	}
	
	return ret;
}

bool MSStringMgr::supportsUnicode() const {
	return true;
}

// checks whether a string is UTF or not
bool MSStringMgr::isUtf8(char *buf) const {
  int i, n;
  register unsigned char c;
  bool gotone = false;

#define F 0   /* character never appears in text */
#define T 1   /* character appears in plain ASCII text */
#define I 2   /* character appears in ISO-8859 text */
#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */

  static const unsigned char text_chars[256] = {
        /*                  BEL BS HT LF    FF CR    */
        F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
        /*                              ESC          */
        F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
        T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
        /*            NEL                            */
        X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
        X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
        I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
  };

  /* *ulen = 0; */
  for (i = 0; (c = buf[i]); i++) {
    if ((c & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
      /*
       * Even if the whole file is valid UTF-8 sequences,
       * still reject it if it uses weird control characters.
       */

      if (text_chars[c] != T)
        return false;

    } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
      return false;
    } else {                           /* 11xxxxxx begins UTF-8 */
      int following;

    if ((c & 0x20) == 0) {             /* 110xxxxx */
      following = 1;
    } else if ((c & 0x10) == 0) {      /* 1110xxxx */
      following = 2;
    } else if ((c & 0x08) == 0) {      /* 11110xxx */
      following = 3;
    } else if ((c & 0x04) == 0) {      /* 111110xx */
      following = 4;
    } else if ((c & 0x02) == 0) {      /* 1111110x */
      following = 5;
    } else
      return false;

      for (n = 0; n < following; n++) {
        i++;
        if (!(c = buf[i]))
          goto done;

        if ((c & 0x80) == 0 || (c & 0x40))
          return false;
      }
      gotone = true;
    }
  }
done:
  return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
}

#undef F
#undef T
#undef I
#undef X
