[Unison-hackers] [unison-svn] r326 - in trunk/src: . ubase

vouillon@seas.upenn.edu vouillon at seas.upenn.edu
Mon May 4 14:48:29 EDT 2009


Author: vouillon
Date: 2009-05-04 14:48:23 -0400 (Mon, 04 May 2009)
New Revision: 326

Added:
   trunk/src/unicode.ml
   trunk/src/unicode.mli
   trunk/src/unicode_tables.ml
Modified:
   trunk/src/.depend
   trunk/src/Makefile.OCaml
   trunk/src/RECENTNEWS
   trunk/src/case.ml
   trunk/src/case.mli
   trunk/src/mkProjectInfo.ml
   trunk/src/name.ml
   trunk/src/name.mli
   trunk/src/path.ml
   trunk/src/pred.ml
   trunk/src/ubase/depend
   trunk/src/update.ml
Log:
* Updated list of bad Windows file names following the MSDN
  documentation (in particular, files with trailing dots are now
  rejected when synchronizing with a Windows machine)
* Experimental Unicode-aware case insensitive mode.  It is activated
  when the preference "unicode" is set to true and Unison is in
  case-insensitive mode.
* Bumped version number to reflect the newly added preference


Modified: trunk/src/.depend
===================================================================
--- trunk/src/.depend	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/.depend	2009-05-04 18:48:23 UTC (rev 326)
@@ -1,37 +1,52 @@
 abort.cmi: uutil.cmi 
+case.cmi: 
+checksum.cmi: 
+clroot.cmi: 
 common.cmi: uutil.cmi props.cmi path.cmi osx.cmi os.cmi name.cmi fspath.cmi \
     fileinfo.cmi 
 copy.cmi: uutil.cmi props.cmi path.cmi osx.cmi os.cmi lwt/lwt.cmi fspath.cmi \
     common.cmi 
+external.cmi: 
 fileinfo.cmi: props.cmi path.cmi osx.cmi fspath.cmi 
 files.cmi: uutil.cmi props.cmi path.cmi lwt/lwt_util.cmi lwt/lwt.cmi \
     common.cmi 
+fileutil.cmi: 
 fingerprint.cmi: uutil.cmi path.cmi fspath.cmi 
 fspath.cmi: path.cmi name.cmi 
 globals.cmi: ubase/prefs.cmi path.cmi lwt/lwt.cmi common.cmi 
+lock.cmi: 
+name.cmi: 
 os.cmi: props.cmi path.cmi name.cmi fspath.cmi fileinfo.cmi 
 osx.cmi: uutil.cmi ubase/prefs.cmi path.cmi fspath.cmi fingerprint.cmi 
 path.cmi: name.cmi 
+pred.cmi: 
 props.cmi: uutil.cmi ubase/prefs.cmi path.cmi osx.cmi fspath.cmi 
 recon.cmi: path.cmi common.cmi 
 remote.cmi: ubase/prefs.cmi lwt/lwt.cmi fspath.cmi common.cmi clroot.cmi 
 sortri.cmi: common.cmi 
 stasher.cmi: ubase/prefs.cmi path.cmi os.cmi fspath.cmi 
+strings.cmi: 
+terminal.cmi: 
+test.cmi: 
 transfer.cmi: uutil.cmi lwt/lwt.cmi 
 transport.cmi: uutil.cmi lwt/lwt.cmi common.cmi 
+tree.cmi: 
 uicommon.cmi: uutil.cmi ubase/prefs.cmi path.cmi lwt/lwt.cmi common.cmi 
+uigtk2.cmi: uicommon.cmi 
 uigtk.cmi: uicommon.cmi 
-uigtk2.cmi: uicommon.cmi 
+ui.cmi: 
 uitext.cmi: uicommon.cmi 
+unicode.cmi: 
 update.cmi: tree.cmi props.cmi path.cmi osx.cmi os.cmi name.cmi lwt/lwt.cmi \
     fspath.cmi fileinfo.cmi common.cmi 
+uutil.cmi: 
 xferhint.cmi: ubase/prefs.cmi path.cmi os.cmi fspath.cmi 
 abort.cmo: uutil.cmi ubase/util.cmi ubase/trace.cmi ubase/safelist.cmi \
     abort.cmi 
 abort.cmx: uutil.cmx ubase/util.cmx ubase/trace.cmx ubase/safelist.cmx \
     abort.cmi 
-case.cmo: ubase/prefs.cmi case.cmi 
-case.cmx: ubase/prefs.cmx case.cmi 
+case.cmo: ubase/util.cmi unicode.cmi ubase/rx.cmi ubase/prefs.cmi case.cmi 
+case.cmx: ubase/util.cmx unicode.cmx ubase/rx.cmx ubase/prefs.cmx case.cmi 
 checksum.cmo: checksum.cmi 
 checksum.cmx: checksum.cmi 
 clroot.cmo: ubase/util.cmi ubase/rx.cmi ubase/prefs.cmi clroot.cmi 
@@ -80,10 +95,10 @@
 globals.cmx: ubase/util.cmx ubase/trace.cmx ubase/safelist.cmx remote.cmx \
     ubase/prefs.cmx pred.cmx path.cmx os.cmx name.cmx lwt/lwt_util.cmx \
     lwt/lwt_unix.cmx lwt/lwt.cmx common.cmx clroot.cmx globals.cmi 
+linkgtk2.cmo: uigtk2.cmi main.cmo 
+linkgtk2.cmx: uigtk2.cmx main.cmx 
 linkgtk.cmo: uigtk.cmi main.cmo 
 linkgtk.cmx: uigtk.cmx main.cmx 
-linkgtk2.cmo: uigtk2.cmi main.cmo 
-linkgtk2.cmx: uigtk2.cmx main.cmx 
 linktext.cmo: uitext.cmi main.cmo 
 linktext.cmx: uitext.cmx main.cmx 
 linktk.cmo: main.cmo 
@@ -94,6 +109,8 @@
     ubase/safelist.cmi remote.cmi ubase/prefs.cmi os.cmi fspath.cmi 
 main.cmx: uutil.cmx ubase/util.cmx uitext.cmx uicommon.cmx strings.cmx \
     ubase/safelist.cmx remote.cmx ubase/prefs.cmx os.cmx fspath.cmx 
+mkProjectInfo.cmo: 
+mkProjectInfo.cmx: 
 name.cmo: ubase/util.cmi case.cmi name.cmi 
 name.cmx: ubase/util.cmx case.cmx name.cmi 
 os.cmo: uutil.cmi ubase/util.cmi ubase/safelist.cmi props.cmi ubase/prefs.cmi \
@@ -108,6 +125,8 @@
     fileutil.cmi case.cmi path.cmi 
 path.cmx: ubase/util.cmx ubase/safelist.cmx ubase/rx.cmx pred.cmx name.cmx \
     fileutil.cmx case.cmx path.cmi 
+pixmaps.cmo: 
+pixmaps.cmx: 
 pred.cmo: ubase/util.cmi ubase/safelist.cmi ubase/rx.cmi ubase/prefs.cmi \
     case.cmi pred.cmi 
 pred.cmx: ubase/util.cmx ubase/safelist.cmx ubase/rx.cmx ubase/prefs.cmx \
@@ -178,26 +197,26 @@
     props.cmx ubase/prefs.cmx path.cmx osx.cmx os.cmx name.cmx \
     lwt/lwt_unix.cmx lwt/lwt.cmx globals.cmx fspath.cmx files.cmx \
     fileinfo.cmx common.cmx clroot.cmx case.cmx uicommon.cmi 
-uigtk.cmo: uutil.cmi ubase/util.cmi update.cmi uitext.cmi uicommon.cmi \
+uigtk2.cmo: uutil.cmi ubase/util.cmi update.cmi uitext.cmi uicommon.cmi \
     transport.cmi ubase/trace.cmi strings.cmi sortri.cmi ubase/safelist.cmi \
     remote.cmi recon.cmi ubase/prefs.cmi pixmaps.cmo path.cmi os.cmi \
     lwt/lwt_util.cmi lwt/lwt_unix.cmi lwt/lwt.cmi globals.cmi fspath.cmi \
-    files.cmi common.cmi clroot.cmi uigtk.cmi 
-uigtk.cmx: uutil.cmx ubase/util.cmx update.cmx uitext.cmx uicommon.cmx \
+    files.cmi common.cmi clroot.cmi uigtk2.cmi 
+uigtk2.cmx: uutil.cmx ubase/util.cmx update.cmx uitext.cmx uicommon.cmx \
     transport.cmx ubase/trace.cmx strings.cmx sortri.cmx ubase/safelist.cmx \
     remote.cmx recon.cmx ubase/prefs.cmx pixmaps.cmx path.cmx os.cmx \
     lwt/lwt_util.cmx lwt/lwt_unix.cmx lwt/lwt.cmx globals.cmx fspath.cmx \
-    files.cmx common.cmx clroot.cmx uigtk.cmi 
-uigtk2.cmo: uutil.cmi ubase/util.cmi update.cmi uitext.cmi uicommon.cmi \
+    files.cmx common.cmx clroot.cmx uigtk2.cmi 
+uigtk.cmo: uutil.cmi ubase/util.cmi update.cmi uitext.cmi uicommon.cmi \
     transport.cmi ubase/trace.cmi strings.cmi sortri.cmi ubase/safelist.cmi \
     remote.cmi recon.cmi ubase/prefs.cmi pixmaps.cmo path.cmi os.cmi \
     lwt/lwt_util.cmi lwt/lwt_unix.cmi lwt/lwt.cmi globals.cmi fspath.cmi \
-    files.cmi common.cmi clroot.cmi uigtk2.cmi 
-uigtk2.cmx: uutil.cmx ubase/util.cmx update.cmx uitext.cmx uicommon.cmx \
+    files.cmi common.cmi clroot.cmi uigtk.cmi 
+uigtk.cmx: uutil.cmx ubase/util.cmx update.cmx uitext.cmx uicommon.cmx \
     transport.cmx ubase/trace.cmx strings.cmx sortri.cmx ubase/safelist.cmx \
     remote.cmx recon.cmx ubase/prefs.cmx pixmaps.cmx path.cmx os.cmx \
     lwt/lwt_util.cmx lwt/lwt_unix.cmx lwt/lwt.cmx globals.cmx fspath.cmx \
-    files.cmx common.cmx clroot.cmx uigtk2.cmi 
+    files.cmx common.cmx clroot.cmx uigtk.cmi 
 uimacbridge.cmo: xferhint.cmi uutil.cmi ubase/util.cmi update.cmi \
     uicommon.cmi transport.cmi ubase/trace.cmi terminal.cmi stasher.cmi \
     ubase/safelist.cmi remote.cmi recon.cmi ubase/prefs.cmi path.cmi os.cmi \
@@ -226,18 +245,22 @@
     ubase/trace.cmx ubase/safelist.cmx remote.cmx recon.cmx ubase/prefs.cmx \
     path.cmx lwt/lwt_util.cmx lwt/lwt_unix.cmx lwt/lwt.cmx globals.cmx \
     common.cmx uitext.cmi 
+unicode.cmo: unicode_tables.cmo unicode.cmi 
+unicode.cmx: unicode_tables.cmx unicode.cmi 
+unicode_tables.cmo: 
+unicode_tables.cmx: 
 update.cmo: xferhint.cmi uutil.cmi ubase/util.cmi tree.cmi ubase/trace.cmi \
-    stasher.cmi ubase/safelist.cmi ubase/rx.cmi remote.cmi props.cmi \
-    ubase/prefs.cmi pred.cmi path.cmi osx.cmi os.cmi name.cmi ubase/myMap.cmi \
+    stasher.cmi ubase/safelist.cmi remote.cmi props.cmi ubase/prefs.cmi \
+    pred.cmi path.cmi osx.cmi os.cmi name.cmi ubase/myMap.cmi \
     lwt/lwt_unix.cmi lwt/lwt.cmi lock.cmi globals.cmi fspath.cmi \
     fingerprint.cmi fileinfo.cmi external.cmi common.cmi update.cmi 
 update.cmx: xferhint.cmx uutil.cmx ubase/util.cmx tree.cmx ubase/trace.cmx \
-    stasher.cmx ubase/safelist.cmx ubase/rx.cmx remote.cmx props.cmx \
-    ubase/prefs.cmx pred.cmx path.cmx osx.cmx os.cmx name.cmx ubase/myMap.cmx \
+    stasher.cmx ubase/safelist.cmx remote.cmx props.cmx ubase/prefs.cmx \
+    pred.cmx path.cmx osx.cmx os.cmx name.cmx ubase/myMap.cmx \
     lwt/lwt_unix.cmx lwt/lwt.cmx lock.cmx globals.cmx fspath.cmx \
     fingerprint.cmx fileinfo.cmx external.cmx common.cmx update.cmi 
-uutil.cmo: ubase/util.cmi uutil.cmi 
-uutil.cmx: ubase/util.cmx uutil.cmi 
+uutil.cmo: ubase/util.cmi ubase/projectInfo.cmo uutil.cmi 
+uutil.cmx: ubase/util.cmx ubase/projectInfo.cmx uutil.cmi 
 xferhint.cmo: ubase/util.cmi ubase/trace.cmi ubase/prefs.cmi path.cmi os.cmi \
     fspath.cmi xferhint.cmi 
 xferhint.cmx: ubase/util.cmx ubase/trace.cmx ubase/prefs.cmx path.cmx os.cmx \
@@ -256,6 +279,8 @@
     ubase/prefs.cmi 
 ubase/prefs.cmx: ubase/util.cmx ubase/uarg.cmx ubase/safelist.cmx \
     ubase/prefs.cmi 
+ubase/projectInfo.cmo: 
+ubase/projectInfo.cmx: 
 ubase/rx.cmo: ubase/rx.cmi 
 ubase/rx.cmx: ubase/rx.cmi 
 ubase/safelist.cmo: ubase/safelist.cmi 
@@ -270,7 +295,15 @@
 ubase/uprintf.cmx: ubase/uprintf.cmi 
 ubase/util.cmo: ubase/uprintf.cmi ubase/safelist.cmi ubase/util.cmi 
 ubase/util.cmx: ubase/uprintf.cmx ubase/safelist.cmx ubase/util.cmi 
+lwt/lwt.cmi: 
 lwt/lwt_unix.cmi: lwt/lwt.cmi 
 lwt/lwt_util.cmi: lwt/lwt.cmi 
+lwt/pqueue.cmi: 
+ubase/myMap.cmi: 
 ubase/prefs.cmi: ubase/util.cmi 
+ubase/rx.cmi: 
+ubase/safelist.cmi: 
 ubase/trace.cmi: ubase/prefs.cmi 
+ubase/uarg.cmi: 
+ubase/uprintf.cmi: 
+ubase/util.cmi: 

Modified: trunk/src/Makefile.OCaml
===================================================================
--- trunk/src/Makefile.OCaml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/Makefile.OCaml	2009-05-04 18:48:23 UTC (rev 326)
@@ -179,7 +179,7 @@
           \
           lwt/pqueue.cmo lwt/lwt.cmo lwt/lwt_util.cmo lwt/lwt_unix.cmo \
           \
-          case.cmo pred.cmo uutil.cmo \
+          unicode_tables.cmo unicode.cmo case.cmo pred.cmo uutil.cmo \
           fileutil.cmo name.cmo path.cmo fspath.cmo fingerprint.cmo \
           abort.cmo osx.cmo external.cmo \
           props.cmo fileinfo.cmo os.cmo lock.cmo clroot.cmo common.cmo \

Modified: trunk/src/RECENTNEWS
===================================================================
--- trunk/src/RECENTNEWS	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/RECENTNEWS	2009-05-04 18:48:23 UTC (rev 326)
@@ -1,3 +1,14 @@
+CHANGES FROM VERSION 2.33.-4
+
+* Updated list of bad Windows file names following the MSDN
+  documentation (in particular, files with trailing dots are now
+  rejected when synchronizing with a Windows machine)
+* Experimental Unicode-aware case insensitive mode.  It is activated
+  when the preference "unicode" is set to true and Unison is in
+  case-insensitive mode.
+* Bumped version number to reflect the newly added preference
+
+-------------------------------
 CHANGES FROM VERSION 2.32.7
 
 * Move descriptions of recent changes to documentation.

Modified: trunk/src/case.ml
===================================================================
--- trunk/src/case.ml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/case.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -34,16 +34,37 @@
      ^ "useful to set the flag manually (e.g. when running Unison on a  "
      ^ "Unix system with a FAT [Windows] volume mounted).")
 
+let unicodeEncoding =
+  Prefs.createBool "unicode" false
+    "!assume Unicode encoding in case insensitive mode"
+    "When set to {\\tt true}, this flag causes Unison to perform \
+     case insensitive file comparisons assuming Unicode encoding"
+
 (* Defining this variable as a preference ensures that it will be propagated
    to the other host during initialization *)
 let someHostIsInsensitive =
   Prefs.createBool "someHostIsInsensitive" false
     "*Pseudo-preference for internal use only" ""
 
-(* Note: this function must be fast *)
-let insensitive () = Prefs.read someHostIsInsensitive
+(* During startup the client determines the case sensitivity of each root.   *)
+(* If any root is case insensitive, all roots must know it; we ensure this   *)
+(* by storing the information in a pref so that it is propagated to the      *)
+(* server with the rest of the prefs.                                        *)
+let init b =
+  Prefs.set someHostIsInsensitive
+    (Prefs.read caseInsensitiveMode = "yes" ||
+     Prefs.read caseInsensitiveMode = "true" ||
+     (Prefs.read caseInsensitiveMode = "default" && b))
 
-let needNormalization s =
+(****)
+
+(* Dots are ignored at the end of filenames under Windows. *)
+
+(* FIX: for the moment, simply disallow files ending with a dot.
+   This is more efficient, and this may well be good enough.
+   We should reconsider this is people start complaining...
+
+let hasTrailingDots s =
   let rec iter s pos len wasDot =
     if pos = len then wasDot else
     let c = s.[pos] in
@@ -67,31 +88,87 @@
   done;
   String.sub s' (!pos' + 1) (len - !pos' - 1)
 
-(* Dots are ignored at the end of filenames under Windows. *)
-let normalize s =
+let rmTrailDots s =
   s
 (*FIX: disabled for know -- requires an archive version change
   if
-    insensitive () &&
-(*FIX: should only be done when one host is running under Windows...
-(should be OK for now as it seems unlikely to have a file ending with
- a dot and the same file with the same name but no dot at the end)
     Prefs.read someHostIsRunningWindows &&
     not (Prefs.read allHostsAreRunningWindows) &&
-*)
-    needNormalization s
+    hasTrailingDots s
   then
     removeTrailingDots s
   else
     s
 *)
+*)
 
-(* During startup the client determines the case sensitivity of each root.   *)
-(* If any root is case insensitive, all roots must know it; we ensure this   *)
-(* by storing the information in a pref so that it is propagated to the      *)
-(* server with the rest of the prefs.                                        *)
-let init b =
-  Prefs.set someHostIsInsensitive
-    (Prefs.read caseInsensitiveMode = "yes" ||
-     Prefs.read caseInsensitiveMode = "true" ||
-     (Prefs.read caseInsensitiveMode = "default" && b))
+(****)
+
+(* Windows file naming conventions are descripted here:
+   <http://msdn.microsoft.com/en-us/library/aa365247(printer).aspx> *)
+let badWindowsFilenameRx =
+  Rx.case_insensitive
+    (Rx.rx
+       "(.*[\000-\031<>:\"/\\|?*].*)|\
+        ((con|prn|aux|nul|com[1-9]|lpt[1-9])(\\.[^.]*)?)|\
+        (.*[. ])")
+
+let isBadWindowsFilename s =
+  (* FIX: should also check for a max filename length, not sure how much *)
+  Rx.match_string badWindowsFilenameRx s
+let badFilename someHostIsRunningWindows s =
+  (* Don't check unless we are syncing with Windows *)
+  someHostIsRunningWindows && isBadWindowsFilename s
+
+(****)
+
+type mode = Sensitive | Insensitive | UnicodeInsensitive
+
+(*
+Important invariant:
+  if [compare s s' = 0],
+  then [hash s = hash s'] and
+  and  [Rx.match_string rx (normalizeMatchedString s) =
+        Rx.match_string rx (normalizeMatchedString s')]
+  (when [rx] has been compiled using the [caseInsensitiveMatch] mode)
+*)
+
+let sensitiveOps = object
+  method mode = Sensitive
+  method compare s s' = compare s s'
+  method hash s = Hashtbl.hash s
+  method normalizePattern s = s
+  method caseInsensitiveMatch = false
+  method normalizeMatchedString s = s
+  method badFilename w s = badFilename w s
+end
+
+let insensitiveOps = object
+  method mode = Insensitive
+  method compare s s' = Util.nocase_cmp s s'
+  method hash s = Hashtbl.hash (String.lowercase s)
+  method normalizePattern s = s
+  method caseInsensitiveMatch = true
+  method normalizeMatchedString s = s
+  method badFilename w s = badFilename w s
+end
+
+let unicodeInsensitiveOps = object
+  method mode = UnicodeInsensitive
+  method compare s s' = Unicode.compare s s'
+  method hash s = Hashtbl.hash (Unicode.normalize s)
+  method normalizePattern p = Unicode.normalize p
+  method caseInsensitiveMatch = false
+  method normalizeMatchedString s = Unicode.normalize s
+  method badFilename w s = not (Unicode.check_utf_8 s) || badFilename w s
+end
+
+(* Note: the dispatch must be fast *)
+let ops () =
+  if Prefs.read someHostIsInsensitive then begin
+    if Prefs.read unicodeEncoding then
+      unicodeInsensitiveOps
+    else
+      insensitiveOps
+  end else
+    sensitiveOps

Modified: trunk/src/case.mli
===================================================================
--- trunk/src/case.mli	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/case.mli	2009-05-04 18:48:23 UTC (rev 326)
@@ -1,8 +1,15 @@
 (* Unison file synchronizer: src/case.mli *)
 (* Copyright 1999-2009, Benjamin C. Pierce (see COPYING for details) *)
 
-val insensitive : unit -> bool
+type mode
 
-val normalize : string -> string
+val ops : unit ->
+  < mode : mode;
+    compare : string -> string -> int;
+    hash : string -> int;
+    normalizePattern : string -> string;
+    caseInsensitiveMatch : bool;
+    normalizeMatchedString : string -> string;
+    badFilename : bool -> string -> bool >
 
 val init : bool -> unit

Modified: trunk/src/mkProjectInfo.ml
===================================================================
--- trunk/src/mkProjectInfo.ml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/mkProjectInfo.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -5,8 +5,8 @@
 
 let projectName = "unison"
 let majorVersion = 2
-let minorVersion = 32
-let pointVersionOrigin = 313 (* Revision that corresponds to point version 0 *)
+let minorVersion = 33
+let pointVersionOrigin = 325 (* Revision that corresponds to point version 0 *)
 
 (* Documentation:
    This is a program to construct a version of the form Major.Minor.Point,
@@ -105,3 +105,4 @@
 
 
 
+

Modified: trunk/src/name.ml
===================================================================
--- trunk/src/name.ml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/name.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -20,11 +20,7 @@
    INCREMENT "UPDATE.ARCHIVEFORMAT" *)
 type t = string
 
-let compare n1 n2 =
-  if Case.insensitive () then
-    Util.nocase_cmp (Case.normalize n1) (Case.normalize n2)
-  else
-    compare n1 n2
+let compare n1 n2 = (Case.ops())#compare n1 n2
 
 let eq a b = (0 = (compare a b))
 
@@ -41,5 +37,7 @@
   (* We ought to consider further checks, e.g., in Windows, no colons *)
   s
 
-let hash n =
-  Hashtbl.hash (if Case.insensitive () then String.lowercase (Case.normalize n) else n)
+let hash n = (Case.ops())#hash n
+
+let bad someHostIsRunningWindows n =
+  (Case.ops())#badFilename someHostIsRunningWindows n

Modified: trunk/src/name.mli
===================================================================
--- trunk/src/name.mli	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/name.mli	2009-05-04 18:48:23 UTC (rev 326)
@@ -9,3 +9,5 @@
 val compare : t -> t -> int
 val eq : t -> t -> bool
 val hash : t -> int
+
+val bad : bool -> t -> bool

Modified: trunk/src/path.ml
===================================================================
--- trunk/src/path.ml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/path.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -170,8 +170,7 @@
 
 let toString path = path
 
-let compare p1 p2 =
-  if Case.insensitive () then Util.nocase_cmp p1 p2 else compare p1 p2
+let compare p1 p2 = (Case.ops())#compare p1 p2
 
 let toDebugString path = String.concat " / " (toStringList path)
 
@@ -191,6 +190,7 @@
     assert (not (isEmpty path));
     prefix ^ path
 
+(* No need to perform case normalization on local paths *)
 let hash p = Hashtbl.hash p
 
 (* Pref controlling whether symlinks are followed. *)

Modified: trunk/src/pred.ml
===================================================================
--- trunk/src/pred.ml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/pred.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -28,7 +28,7 @@
     mutable default: string list;
     mutable last_pref : string list;
     mutable last_def : string list;
-    mutable last_mode : bool;
+    mutable last_mode : Case.mode;
     mutable compiled: Rx.t;
     mutable associated_strings : (Rx.t * string) list;
   }
@@ -61,7 +61,7 @@
   let (p,v) =
     match Util.splitIntoWordsByString clause mapSeparator with
       [p] -> (p,None)
-    | [p;v] -> (p, Some (Util.trimWhitespace v))
+    | [p;v] -> (p, Some ((Case.ops())#normalizePattern (Util.trimWhitespace v)))
     | [] -> raise (Prefs.IllegalValue "Empty pattern")
     | _ -> raise (Prefs.IllegalValue ("Malformed pattern: "
                   ^ "\"" ^ clause ^ "\"\n"
@@ -97,7 +97,7 @@
         string :: oldList)
       (fun l -> l) in
   {pref = pref; name = name;
-   last_pref = []; default = []; last_def = []; last_mode = false;
+   last_pref = []; default = []; last_def = []; last_mode = (Case.ops())#mode;
    compiled = Rx.empty; associated_strings = []} 
 
 let addDefaultPatterns p pats =
@@ -115,14 +115,16 @@
                        None -> None
                      | Some v -> Some (rx,v))
                   compiledList in
-  p.compiled <- if mode then Rx.case_insensitive compiled else compiled;
+  p.compiled <-
+    if (Case.ops())#caseInsensitiveMatch then Rx.case_insensitive compiled
+    else compiled;
   p.associated_strings <- strings;
   p.last_pref <- pref;
   p.last_def <- p.default;
   p.last_mode <- mode
 
 let recompile_if_needed p =
-  let mode = Case.insensitive () in
+  let mode = (Case.ops())#mode in
   if
     p.last_mode <> mode ||
     p.last_pref != Prefs.read p.pref ||
@@ -148,10 +150,11 @@
 
 let test p s =
   recompile_if_needed p;
-  let res = Rx.match_string p.compiled (Case.normalize s) in
+  let res = Rx.match_string p.compiled ((Case.ops())#normalizeMatchedString s) in
   debug (fun() -> Util.msg "%s '%s' = %b\n" p.name s res);
   res
 
 let assoc p s =
   recompile_if_needed p;
+  let s = (Case.ops())#normalizeMatchedString s in
   snd (Safelist.find (fun (rx,v) -> Rx.match_string rx s) p.associated_strings)

Modified: trunk/src/ubase/depend
===================================================================
--- trunk/src/ubase/depend	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/ubase/depend	2009-05-04 18:48:23 UTC (rev 326)
@@ -2,6 +2,8 @@
 myMap.cmx: myMap.cmi 
 prefs.cmo: util.cmi uarg.cmi safelist.cmi prefs.cmi 
 prefs.cmx: util.cmx uarg.cmx safelist.cmx prefs.cmi 
+projectInfo.cmo: 
+projectInfo.cmx: 
 rx.cmo: rx.cmi 
 rx.cmx: rx.cmi 
 safelist.cmo: safelist.cmi 
@@ -14,5 +16,11 @@
 uprintf.cmx: uprintf.cmi 
 util.cmo: uprintf.cmi safelist.cmi util.cmi 
 util.cmx: uprintf.cmx safelist.cmx util.cmi 
+myMap.cmi: 
 prefs.cmi: util.cmi 
+rx.cmi: 
+safelist.cmi: 
 trace.cmi: prefs.cmi 
+uarg.cmi: 
+uprintf.cmi: 
+util.cmi: 

Added: trunk/src/unicode.ml
===================================================================
--- trunk/src/unicode.ml	                        (rev 0)
+++ trunk/src/unicode.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -0,0 +1,859 @@
+(* Unison file synchronizer: src/unicode.ml *)
+(* Copyright 1999-2009, Benjamin C. Pierce 
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*)
+
+open Unicode_tables
+
+exception Invalid
+
+let fail () = raise Invalid
+
+let get s i = Char.code (String.unsafe_get s i)
+let set s i v = String.unsafe_set s i (Char.unsafe_chr v)
+
+(****)
+
+let hangul_sbase = 0xAC00
+let hangul_lbase = 0x1100
+let hangul_vbase = 0x1161
+let hangul_tbase = 0x11A7
+
+let hangul_scount = 11172
+let hangul_lcount = 19
+let hangul_vcount = 21
+let hangul_tcount = 28
+let hangul_ncount = hangul_vcount * hangul_tcount
+
+let set_char_3 s i c =
+  set s i (c lsr 12 + 0xE0);
+  set s (i + 1) ((c lsr 6) land 0x3f + 0x80);
+  set s (i + 2) (c land 0x3f + 0x80)
+
+let rec norm s i l s' j =
+  if i < l then begin
+    let c = get s i in
+    if c < 0x80 then begin
+      set s' j (get ascii_lower c);
+      norm s (i + 1) l s' (j + 1)
+    end else if c < 0xE0 then begin
+      (* 80 - 7FF *)
+      if c < 0xc2 || i + 1 >= l then raise Invalid;
+      let c1 = get s (i + 1) in
+      if c1 land 0xc0 <> 0x80 then raise Invalid;
+      let idx = get norm_prim (c - 0xc0) in
+      let idx = idx lsl 6 + c1 - 0x80 in
+      let k = get norm_second_high idx in
+      if k = 0 then begin
+        set s' j c;
+        set s' (j + 1) c1;
+        norm s (i + 2) l s' (j + 2)
+      end else begin
+        let k = (k - 2) lsl 8 + get norm_second_low idx in
+        let n = get norm_repl k in
+        String.blit norm_repl (k + 1) s' j n;
+        norm s (i + 2) l s' (j + n)
+      end
+    end else if c < 0xF0 then begin
+      (* 800 - FFFF *)
+      if i + 2 >= l then raise Invalid;
+      let c1 = get s (i + 1) in
+      if c1 land 0xc0 <> 0x80 then raise Invalid;
+      let idx = c lsl 6 + c1 - 0x3880 in
+      if idx < 0x20 then raise Invalid;
+      let c2 = get s (i + 2) in
+      if c2 land 0xc0 <> 0x80 then raise Invalid;
+      let idx = get norm_prim idx in
+      let idx = idx lsl 6 + c2 - 0x80 in
+      let k = get norm_second_high idx in
+      if k = 0 then begin
+        set s' j c;
+        set s' (j + 1) c1;
+        set s' (j + 2) c2;
+        norm s (i + 3) l s' (j + 3)
+      end else if k = 1 then begin
+        let v = c lsl 12 + c1 lsl 6 + c2 - (0x000E2080 + hangul_sbase) in
+        if v >= hangul_scount then begin
+          set s' j c;
+          set s' (j + 1) c1;
+          set s' (j + 2) c2;
+          norm s (i + 3) l s' (j + 3)
+        end else begin
+          set_char_3 s' j (v / hangul_ncount + hangul_lbase);
+          set_char_3 s' (j + 3)
+            ((v mod hangul_ncount) / hangul_tcount + hangul_vbase);
+          if v mod hangul_tcount = 0 then
+            norm s (i + 3) l s' (j + 6)
+          else begin
+            set_char_3 s' (j + 6) ((v mod hangul_tcount) + hangul_tbase);
+            norm s (i + 3) l s' (j + 9)
+          end
+        end
+      end else begin
+        let k = (k - 2) lsl 8 + get norm_second_low idx in
+        let n = get norm_repl k in
+        String.blit norm_repl (k + 1) s' j n;
+        norm s (i + 3) l s' (j + n)
+      end
+    end else begin
+      (* 10000 - 10FFFF *)
+      if i + 3 >= l then raise Invalid;
+      let c1 = get s (i + 1) in
+      let c2 = get s (i + 2) in
+      let c3 = get s (i + 3) in
+      if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then raise Invalid;
+      let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
+      if v < 0x10000 || v > 0x10ffff then raise Invalid;
+      set s' j c;
+      set s' (j + 1) c1;
+      set s' (j + 2) c2;
+      set s' (j + 3) c3;
+      norm s (i + 4) l s' (j + 4)
+    end
+  end else
+    String.sub s' 0 j
+
+let normalize s =
+  let l = String.length s in
+  let s' = String.create (3 * l) in
+  try norm s 0 l s' 0 with Invalid -> s
+
+(****)
+
+let rec compare_rec s s' i l =
+  if i = l then begin
+    if l < String.length s then 1 else
+    if l < String.length s' then -1 else
+    0
+  end else begin
+    let c = get s i in
+    let c' = get s' i in
+    if c < 0x80 && c' < 0x80 then begin
+      let v = compare (get ascii_lower c) (get ascii_lower c') in
+      if v <> 0 then v else compare_rec s s' (i + 1) l
+    end else
+      compare (normalize s) (normalize s')
+  end
+
+let compare s s' =
+  compare_rec s s' 0 (min (String.length s) (String.length s'))
+
+(****)
+
+let rec decode_char s i l =
+  if i = l then fail () else
+  let c = get s i in
+  if c < 0x80 then
+    cont s (i + 1) l c
+  else if c < 0xE0 then begin
+    (* 80 - 7FF *)
+    if c < 0xc2 || i + 1 >= l then fail () else
+    let c1 = get s (i + 1) in
+    if c1 land 0xc0 <> 0x80 then fail () else
+    let v = c lsl 6 + c1 - 0x3080 in
+    cont s (i + 2) l v
+  end else if c < 0xF0 then begin
+    (* 800 - FFFF *)
+    if i + 2 >= l then fail () else
+    let c1 = get s (i + 1) in
+    let c2 = get s (i + 2) in
+    if (c1 lor c2) land 0xc0 <> 0x80 then fail () else
+    let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
+    if v < 0x800 then fail () else
+    cont s (i + 3) l v
+  end else begin
+    (* 10000 - 10FFFF *)
+    if i + 3 >= l then fail () else
+    let c1 = get s (i + 1) in
+    let c2 = get s (i + 2) in
+    let c3 = get s (i + 3) in
+    if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else
+    let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
+    if v < 0x10000 || v > 0x10ffff then fail () else
+    cont s (i + 4) l v
+  end
+
+and cont s i l v = (v, i)
+
+let encode_char s i l c =
+  if c < 0x80 then begin
+    if i >= l then fail () else begin
+      set s i c;
+      i + 1
+    end
+  end else if c < 0x800 then begin
+    if i + 1 >= l then fail () else begin
+      set s i (c lsr 6 + 0xC0);
+      set s (i + 1) (c land 0x3f + 0x80);
+      i + 2
+    end
+  end else if c < 0x10000 then begin
+    if i + 1 >= l then fail () else begin
+      set s i (c lsr 12 + 0xE0);
+      set s (i + 1) ((c lsr 6) land 0x3f + 0x80);
+      set s (i + 2) (c land 0x3f + 0x80);
+      i + 3
+    end
+  end else begin
+    if i + 1 >= l then fail () else begin
+      set s i (c lsr 18 + 0xF0);
+      set s (i + 1) ((c lsr 12) land 0x3f + 0x80);
+      set s (i + 2) ((c lsr 6) land 0x3f + 0x80);
+      set s (i + 3) (c land 0x3f + 0x80);
+      i + 4
+    end
+  end
+
+let rec prev_char s i =
+  let i = i - 1 in
+  if i < 0 then fail () else
+  if (get s i) land 0xc0 <> 0x80 then i else prev_char s i
+
+(****)
+
+let uniCharPrecompSourceTable = [|
+	0x00000300; 0x00540000; 0x00000301; 0x00750054;
+	0x00000302; 0x002000C9; 0x00000303; 0x001C00E9;
+	0x00000304; 0x002C0105; 0x00000306; 0x00200131;
+	0x00000307; 0x002E0151; 0x00000308; 0x0036017F;
+	0x00000309; 0x001801B5; 0x0000030A; 0x000601CD;
+	0x0000030B; 0x000601D3; 0x0000030C; 0x002501D9;
+	0x0000030F; 0x000E01FE; 0x00000311; 0x000C020C;
+	0x00000313; 0x000E0218; 0x00000314; 0x00100226;
+	0x0000031B; 0x00040236; 0x00000323; 0x002A023A;
+	0x00000324; 0x00020264; 0x00000325; 0x00020266;
+	0x00000326; 0x00040268; 0x00000327; 0x0016026C;
+	0x00000328; 0x000A0282; 0x0000032D; 0x000C028C;
+	0x0000032E; 0x00020298; 0x00000330; 0x0006029A;
+	0x00000331; 0x001102A0; 0x00000338; 0x002C02B1;
+	0x00000342; 0x001D02DD; 0x00000345; 0x003F02FA;
+	0x00000653; 0x00010339; 0x00000654; 0x0006033A;
+	0x00000655; 0x00010340; 0x0000093C; 0x00030341;
+	0x000009BE; 0x00010344; 0x000009D7; 0x00010345;
+	0x00000B3E; 0x00010346; 0x00000B56; 0x00010347;
+	0x00000B57; 0x00010348; 0x00000BBE; 0x00020349;
+	0x00000BD7; 0x0002034B; 0x00000C56; 0x0001034D;
+	0x00000CC2; 0x0001034E; 0x00000CD5; 0x0003034F;
+	0x00000CD6; 0x00010352; 0x00000D3E; 0x00020353;
+	0x00000D57; 0x00010355; 0x00000DCA; 0x00020356;
+	0x00000DCF; 0x00010358; 0x00000DDF; 0x00010359;
+	0x0000102E; 0x0001035A; 0x00003099; 0x0030035B;
+	0x0000309A; 0x000A038B
+|]
+
+let uniCharBMPPrecompDestinationTable = [|
+	0x0041; 0x00C0; 0x0045; 0x00C8; 0x0049; 0x00CC; 0x004E; 0x01F8;
+	0x004F; 0x00D2; 0x0055; 0x00D9; 0x0057; 0x1E80; 0x0059; 0x1EF2;
+	0x0061; 0x00E0; 0x0065; 0x00E8; 0x0069; 0x00EC; 0x006E; 0x01F9;
+	0x006F; 0x00F2; 0x0075; 0x00F9; 0x0077; 0x1E81; 0x0079; 0x1EF3;
+	0x00A8; 0x1FED; 0x00C2; 0x1EA6; 0x00CA; 0x1EC0; 0x00D4; 0x1ED2;
+	0x00DC; 0x01DB; 0x00E2; 0x1EA7; 0x00EA; 0x1EC1; 0x00F4; 0x1ED3;
+	0x00FC; 0x01DC; 0x0102; 0x1EB0; 0x0103; 0x1EB1; 0x0112; 0x1E14;
+	0x0113; 0x1E15; 0x014C; 0x1E50; 0x014D; 0x1E51; 0x01A0; 0x1EDC;
+	0x01A1; 0x1EDD; 0x01AF; 0x1EEA; 0x01B0; 0x1EEB; 0x0391; 0x1FBA;
+	0x0395; 0x1FC8; 0x0397; 0x1FCA; 0x0399; 0x1FDA; 0x039F; 0x1FF8;
+	0x03A5; 0x1FEA; 0x03A9; 0x1FFA; 0x03B1; 0x1F70; 0x03B5; 0x1F72;
+	0x03B7; 0x1F74; 0x03B9; 0x1F76; 0x03BF; 0x1F78; 0x03C5; 0x1F7A;
+	0x03C9; 0x1F7C; 0x03CA; 0x1FD2; 0x03CB; 0x1FE2; 0x0415; 0x0400;
+	0x0418; 0x040D; 0x0435; 0x0450; 0x0438; 0x045D; 0x1F00; 0x1F02;
+	0x1F01; 0x1F03; 0x1F08; 0x1F0A; 0x1F09; 0x1F0B; 0x1F10; 0x1F12;
+	0x1F11; 0x1F13; 0x1F18; 0x1F1A; 0x1F19; 0x1F1B; 0x1F20; 0x1F22;
+	0x1F21; 0x1F23; 0x1F28; 0x1F2A; 0x1F29; 0x1F2B; 0x1F30; 0x1F32;
+	0x1F31; 0x1F33; 0x1F38; 0x1F3A; 0x1F39; 0x1F3B; 0x1F40; 0x1F42;
+	0x1F41; 0x1F43; 0x1F48; 0x1F4A; 0x1F49; 0x1F4B; 0x1F50; 0x1F52;
+	0x1F51; 0x1F53; 0x1F59; 0x1F5B; 0x1F60; 0x1F62; 0x1F61; 0x1F63;
+	0x1F68; 0x1F6A; 0x1F69; 0x1F6B; 0x1FBF; 0x1FCD; 0x1FFE; 0x1FDD;
+	0x0041; 0x00C1; 0x0043; 0x0106; 0x0045; 0x00C9; 0x0047; 0x01F4;
+	0x0049; 0x00CD; 0x004B; 0x1E30; 0x004C; 0x0139; 0x004D; 0x1E3E;
+	0x004E; 0x0143; 0x004F; 0x00D3; 0x0050; 0x1E54; 0x0052; 0x0154;
+	0x0053; 0x015A; 0x0055; 0x00DA; 0x0057; 0x1E82; 0x0059; 0x00DD;
+	0x005A; 0x0179; 0x0061; 0x00E1; 0x0063; 0x0107; 0x0065; 0x00E9;
+	0x0067; 0x01F5; 0x0069; 0x00ED; 0x006B; 0x1E31; 0x006C; 0x013A;
+	0x006D; 0x1E3F; 0x006E; 0x0144; 0x006F; 0x00F3; 0x0070; 0x1E55;
+	0x0072; 0x0155; 0x0073; 0x015B; 0x0075; 0x00FA; 0x0077; 0x1E83;
+	0x0079; 0x00FD; 0x007A; 0x017A; 0x00A8; 0x0385; 0x00C2; 0x1EA4;
+	0x00C5; 0x01FA; 0x00C6; 0x01FC; 0x00C7; 0x1E08; 0x00CA; 0x1EBE;
+	0x00CF; 0x1E2E; 0x00D4; 0x1ED0; 0x00D5; 0x1E4C; 0x00D8; 0x01FE;
+	0x00DC; 0x01D7; 0x00E2; 0x1EA5; 0x00E5; 0x01FB; 0x00E6; 0x01FD;
+	0x00E7; 0x1E09; 0x00EA; 0x1EBF; 0x00EF; 0x1E2F; 0x00F4; 0x1ED1;
+	0x00F5; 0x1E4D; 0x00F8; 0x01FF; 0x00FC; 0x01D8; 0x0102; 0x1EAE;
+	0x0103; 0x1EAF; 0x0112; 0x1E16; 0x0113; 0x1E17; 0x014C; 0x1E52;
+	0x014D; 0x1E53; 0x0168; 0x1E78; 0x0169; 0x1E79; 0x01A0; 0x1EDA;
+	0x01A1; 0x1EDB; 0x01AF; 0x1EE8; 0x01B0; 0x1EE9; 0x0391; 0x0386;
+	0x0395; 0x0388; 0x0397; 0x0389; 0x0399; 0x038A; 0x039F; 0x038C;
+	0x03A5; 0x038E; 0x03A9; 0x038F; 0x03B1; 0x03AC; 0x03B5; 0x03AD;
+	0x03B7; 0x03AE; 0x03B9; 0x03AF; 0x03BF; 0x03CC; 0x03C5; 0x03CD;
+	0x03C9; 0x03CE; 0x03CA; 0x0390; 0x03CB; 0x03B0; 0x03D2; 0x03D3;
+	0x0413; 0x0403; 0x041A; 0x040C; 0x0433; 0x0453; 0x043A; 0x045C;
+	0x1F00; 0x1F04; 0x1F01; 0x1F05; 0x1F08; 0x1F0C; 0x1F09; 0x1F0D;
+	0x1F10; 0x1F14; 0x1F11; 0x1F15; 0x1F18; 0x1F1C; 0x1F19; 0x1F1D;
+	0x1F20; 0x1F24; 0x1F21; 0x1F25; 0x1F28; 0x1F2C; 0x1F29; 0x1F2D;
+	0x1F30; 0x1F34; 0x1F31; 0x1F35; 0x1F38; 0x1F3C; 0x1F39; 0x1F3D;
+	0x1F40; 0x1F44; 0x1F41; 0x1F45; 0x1F48; 0x1F4C; 0x1F49; 0x1F4D;
+	0x1F50; 0x1F54; 0x1F51; 0x1F55; 0x1F59; 0x1F5D; 0x1F60; 0x1F64;
+	0x1F61; 0x1F65; 0x1F68; 0x1F6C; 0x1F69; 0x1F6D; 0x1FBF; 0x1FCE;
+	0x1FFE; 0x1FDE; 0x0041; 0x00C2; 0x0043; 0x0108; 0x0045; 0x00CA;
+	0x0047; 0x011C; 0x0048; 0x0124; 0x0049; 0x00CE; 0x004A; 0x0134;
+	0x004F; 0x00D4; 0x0053; 0x015C; 0x0055; 0x00DB; 0x0057; 0x0174;
+	0x0059; 0x0176; 0x005A; 0x1E90; 0x0061; 0x00E2; 0x0063; 0x0109;
+	0x0065; 0x00EA; 0x0067; 0x011D; 0x0068; 0x0125; 0x0069; 0x00EE;
+	0x006A; 0x0135; 0x006F; 0x00F4; 0x0073; 0x015D; 0x0075; 0x00FB;
+	0x0077; 0x0175; 0x0079; 0x0177; 0x007A; 0x1E91; 0x1EA0; 0x1EAC;
+	0x1EA1; 0x1EAD; 0x1EB8; 0x1EC6; 0x1EB9; 0x1EC7; 0x1ECC; 0x1ED8;
+	0x1ECD; 0x1ED9; 0x0041; 0x00C3; 0x0045; 0x1EBC; 0x0049; 0x0128;
+	0x004E; 0x00D1; 0x004F; 0x00D5; 0x0055; 0x0168; 0x0056; 0x1E7C;
+	0x0059; 0x1EF8; 0x0061; 0x00E3; 0x0065; 0x1EBD; 0x0069; 0x0129;
+	0x006E; 0x00F1; 0x006F; 0x00F5; 0x0075; 0x0169; 0x0076; 0x1E7D;
+	0x0079; 0x1EF9; 0x00C2; 0x1EAA; 0x00CA; 0x1EC4; 0x00D4; 0x1ED6;
+	0x00E2; 0x1EAB; 0x00EA; 0x1EC5; 0x00F4; 0x1ED7; 0x0102; 0x1EB4;
+	0x0103; 0x1EB5; 0x01A0; 0x1EE0; 0x01A1; 0x1EE1; 0x01AF; 0x1EEE;
+	0x01B0; 0x1EEF; 0x0041; 0x0100; 0x0045; 0x0112; 0x0047; 0x1E20;
+	0x0049; 0x012A; 0x004F; 0x014C; 0x0055; 0x016A; 0x0059; 0x0232;
+	0x0061; 0x0101; 0x0065; 0x0113; 0x0067; 0x1E21; 0x0069; 0x012B;
+	0x006F; 0x014D; 0x0075; 0x016B; 0x0079; 0x0233; 0x00C4; 0x01DE;
+	0x00C6; 0x01E2; 0x00D5; 0x022C; 0x00D6; 0x022A; 0x00DC; 0x01D5;
+	0x00E4; 0x01DF; 0x00E6; 0x01E3; 0x00F5; 0x022D; 0x00F6; 0x022B;
+	0x00FC; 0x01D6; 0x01EA; 0x01EC; 0x01EB; 0x01ED; 0x0226; 0x01E0;
+	0x0227; 0x01E1; 0x022E; 0x0230; 0x022F; 0x0231; 0x0391; 0x1FB9;
+	0x0399; 0x1FD9; 0x03A5; 0x1FE9; 0x03B1; 0x1FB1; 0x03B9; 0x1FD1;
+	0x03C5; 0x1FE1; 0x0418; 0x04E2; 0x0423; 0x04EE; 0x0438; 0x04E3;
+	0x0443; 0x04EF; 0x1E36; 0x1E38; 0x1E37; 0x1E39; 0x1E5A; 0x1E5C;
+	0x1E5B; 0x1E5D; 0x0041; 0x0102; 0x0045; 0x0114; 0x0047; 0x011E;
+	0x0049; 0x012C; 0x004F; 0x014E; 0x0055; 0x016C; 0x0061; 0x0103;
+	0x0065; 0x0115; 0x0067; 0x011F; 0x0069; 0x012D; 0x006F; 0x014F;
+	0x0075; 0x016D; 0x0228; 0x1E1C; 0x0229; 0x1E1D; 0x0391; 0x1FB8;
+	0x0399; 0x1FD8; 0x03A5; 0x1FE8; 0x03B1; 0x1FB0; 0x03B9; 0x1FD0;
+	0x03C5; 0x1FE0; 0x0410; 0x04D0; 0x0415; 0x04D6; 0x0416; 0x04C1;
+	0x0418; 0x0419; 0x0423; 0x040E; 0x0430; 0x04D1; 0x0435; 0x04D7;
+	0x0436; 0x04C2; 0x0438; 0x0439; 0x0443; 0x045E; 0x1EA0; 0x1EB6;
+	0x1EA1; 0x1EB7; 0x0041; 0x0226; 0x0042; 0x1E02; 0x0043; 0x010A;
+	0x0044; 0x1E0A; 0x0045; 0x0116; 0x0046; 0x1E1E; 0x0047; 0x0120;
+	0x0048; 0x1E22; 0x0049; 0x0130; 0x004D; 0x1E40; 0x004E; 0x1E44;
+	0x004F; 0x022E; 0x0050; 0x1E56; 0x0052; 0x1E58; 0x0053; 0x1E60;
+	0x0054; 0x1E6A; 0x0057; 0x1E86; 0x0058; 0x1E8A; 0x0059; 0x1E8E;
+	0x005A; 0x017B; 0x0061; 0x0227; 0x0062; 0x1E03; 0x0063; 0x010B;
+	0x0064; 0x1E0B; 0x0065; 0x0117; 0x0066; 0x1E1F; 0x0067; 0x0121;
+	0x0068; 0x1E23; 0x006D; 0x1E41; 0x006E; 0x1E45; 0x006F; 0x022F;
+	0x0070; 0x1E57; 0x0072; 0x1E59; 0x0073; 0x1E61; 0x0074; 0x1E6B;
+	0x0077; 0x1E87; 0x0078; 0x1E8B; 0x0079; 0x1E8F; 0x007A; 0x017C;
+	0x015A; 0x1E64; 0x015B; 0x1E65; 0x0160; 0x1E66; 0x0161; 0x1E67;
+	0x017F; 0x1E9B; 0x1E62; 0x1E68; 0x1E63; 0x1E69; 0x0041; 0x00C4;
+	0x0045; 0x00CB; 0x0048; 0x1E26; 0x0049; 0x00CF; 0x004F; 0x00D6;
+	0x0055; 0x00DC; 0x0057; 0x1E84; 0x0058; 0x1E8C; 0x0059; 0x0178;
+	0x0061; 0x00E4; 0x0065; 0x00EB; 0x0068; 0x1E27; 0x0069; 0x00EF;
+	0x006F; 0x00F6; 0x0074; 0x1E97; 0x0075; 0x00FC; 0x0077; 0x1E85;
+	0x0078; 0x1E8D; 0x0079; 0x00FF; 0x00D5; 0x1E4E; 0x00F5; 0x1E4F;
+	0x016A; 0x1E7A; 0x016B; 0x1E7B; 0x0399; 0x03AA; 0x03A5; 0x03AB;
+	0x03B9; 0x03CA; 0x03C5; 0x03CB; 0x03D2; 0x03D4; 0x0406; 0x0407;
+	0x0410; 0x04D2; 0x0415; 0x0401; 0x0416; 0x04DC; 0x0417; 0x04DE;
+	0x0418; 0x04E4; 0x041E; 0x04E6; 0x0423; 0x04F0; 0x0427; 0x04F4;
+	0x042B; 0x04F8; 0x042D; 0x04EC; 0x0430; 0x04D3; 0x0435; 0x0451;
+	0x0436; 0x04DD; 0x0437; 0x04DF; 0x0438; 0x04E5; 0x043E; 0x04E7;
+	0x0443; 0x04F1; 0x0447; 0x04F5; 0x044B; 0x04F9; 0x044D; 0x04ED;
+	0x0456; 0x0457; 0x04D8; 0x04DA; 0x04D9; 0x04DB; 0x04E8; 0x04EA;
+	0x04E9; 0x04EB; 0x0041; 0x1EA2; 0x0045; 0x1EBA; 0x0049; 0x1EC8;
+	0x004F; 0x1ECE; 0x0055; 0x1EE6; 0x0059; 0x1EF6; 0x0061; 0x1EA3;
+	0x0065; 0x1EBB; 0x0069; 0x1EC9; 0x006F; 0x1ECF; 0x0075; 0x1EE7;
+	0x0079; 0x1EF7; 0x00C2; 0x1EA8; 0x00CA; 0x1EC2; 0x00D4; 0x1ED4;
+	0x00E2; 0x1EA9; 0x00EA; 0x1EC3; 0x00F4; 0x1ED5; 0x0102; 0x1EB2;
+	0x0103; 0x1EB3; 0x01A0; 0x1EDE; 0x01A1; 0x1EDF; 0x01AF; 0x1EEC;
+	0x01B0; 0x1EED; 0x0041; 0x00C5; 0x0055; 0x016E; 0x0061; 0x00E5;
+	0x0075; 0x016F; 0x0077; 0x1E98; 0x0079; 0x1E99; 0x004F; 0x0150;
+	0x0055; 0x0170; 0x006F; 0x0151; 0x0075; 0x0171; 0x0423; 0x04F2;
+	0x0443; 0x04F3; 0x0041; 0x01CD; 0x0043; 0x010C; 0x0044; 0x010E;
+	0x0045; 0x011A; 0x0047; 0x01E6; 0x0048; 0x021E; 0x0049; 0x01CF;
+	0x004B; 0x01E8; 0x004C; 0x013D; 0x004E; 0x0147; 0x004F; 0x01D1;
+	0x0052; 0x0158; 0x0053; 0x0160; 0x0054; 0x0164; 0x0055; 0x01D3;
+	0x005A; 0x017D; 0x0061; 0x01CE; 0x0063; 0x010D; 0x0064; 0x010F;
+	0x0065; 0x011B; 0x0067; 0x01E7; 0x0068; 0x021F; 0x0069; 0x01D0;
+	0x006A; 0x01F0; 0x006B; 0x01E9; 0x006C; 0x013E; 0x006E; 0x0148;
+	0x006F; 0x01D2; 0x0072; 0x0159; 0x0073; 0x0161; 0x0074; 0x0165;
+	0x0075; 0x01D4; 0x007A; 0x017E; 0x00DC; 0x01D9; 0x00FC; 0x01DA;
+	0x01B7; 0x01EE; 0x0292; 0x01EF; 0x0041; 0x0200; 0x0045; 0x0204;
+	0x0049; 0x0208; 0x004F; 0x020C; 0x0052; 0x0210; 0x0055; 0x0214;
+	0x0061; 0x0201; 0x0065; 0x0205; 0x0069; 0x0209; 0x006F; 0x020D;
+	0x0072; 0x0211; 0x0075; 0x0215; 0x0474; 0x0476; 0x0475; 0x0477;
+	0x0041; 0x0202; 0x0045; 0x0206; 0x0049; 0x020A; 0x004F; 0x020E;
+	0x0052; 0x0212; 0x0055; 0x0216; 0x0061; 0x0203; 0x0065; 0x0207;
+	0x0069; 0x020B; 0x006F; 0x020F; 0x0072; 0x0213; 0x0075; 0x0217;
+	0x0391; 0x1F08; 0x0395; 0x1F18; 0x0397; 0x1F28; 0x0399; 0x1F38;
+	0x039F; 0x1F48; 0x03A9; 0x1F68; 0x03B1; 0x1F00; 0x03B5; 0x1F10;
+	0x03B7; 0x1F20; 0x03B9; 0x1F30; 0x03BF; 0x1F40; 0x03C1; 0x1FE4;
+	0x03C5; 0x1F50; 0x03C9; 0x1F60; 0x0391; 0x1F09; 0x0395; 0x1F19;
+	0x0397; 0x1F29; 0x0399; 0x1F39; 0x039F; 0x1F49; 0x03A1; 0x1FEC;
+	0x03A5; 0x1F59; 0x03A9; 0x1F69; 0x03B1; 0x1F01; 0x03B5; 0x1F11;
+	0x03B7; 0x1F21; 0x03B9; 0x1F31; 0x03BF; 0x1F41; 0x03C1; 0x1FE5;
+	0x03C5; 0x1F51; 0x03C9; 0x1F61; 0x004F; 0x01A0; 0x0055; 0x01AF;
+	0x006F; 0x01A1; 0x0075; 0x01B0; 0x0041; 0x1EA0; 0x0042; 0x1E04;
+	0x0044; 0x1E0C; 0x0045; 0x1EB8; 0x0048; 0x1E24; 0x0049; 0x1ECA;
+	0x004B; 0x1E32; 0x004C; 0x1E36; 0x004D; 0x1E42; 0x004E; 0x1E46;
+	0x004F; 0x1ECC; 0x0052; 0x1E5A; 0x0053; 0x1E62; 0x0054; 0x1E6C;
+	0x0055; 0x1EE4; 0x0056; 0x1E7E; 0x0057; 0x1E88; 0x0059; 0x1EF4;
+	0x005A; 0x1E92; 0x0061; 0x1EA1; 0x0062; 0x1E05; 0x0064; 0x1E0D;
+	0x0065; 0x1EB9; 0x0068; 0x1E25; 0x0069; 0x1ECB; 0x006B; 0x1E33;
+	0x006C; 0x1E37; 0x006D; 0x1E43; 0x006E; 0x1E47; 0x006F; 0x1ECD;
+	0x0072; 0x1E5B; 0x0073; 0x1E63; 0x0074; 0x1E6D; 0x0075; 0x1EE5;
+	0x0076; 0x1E7F; 0x0077; 0x1E89; 0x0079; 0x1EF5; 0x007A; 0x1E93;
+	0x01A0; 0x1EE2; 0x01A1; 0x1EE3; 0x01AF; 0x1EF0; 0x01B0; 0x1EF1;
+	0x0055; 0x1E72; 0x0075; 0x1E73; 0x0041; 0x1E00; 0x0061; 0x1E01;
+	0x0053; 0x0218; 0x0054; 0x021A; 0x0073; 0x0219; 0x0074; 0x021B;
+	0x0043; 0x00C7; 0x0044; 0x1E10; 0x0045; 0x0228; 0x0047; 0x0122;
+	0x0048; 0x1E28; 0x004B; 0x0136; 0x004C; 0x013B; 0x004E; 0x0145;
+	0x0052; 0x0156; 0x0053; 0x015E; 0x0054; 0x0162; 0x0063; 0x00E7;
+	0x0064; 0x1E11; 0x0065; 0x0229; 0x0067; 0x0123; 0x0068; 0x1E29;
+	0x006B; 0x0137; 0x006C; 0x013C; 0x006E; 0x0146; 0x0072; 0x0157;
+	0x0073; 0x015F; 0x0074; 0x0163; 0x0041; 0x0104; 0x0045; 0x0118;
+	0x0049; 0x012E; 0x004F; 0x01EA; 0x0055; 0x0172; 0x0061; 0x0105;
+	0x0065; 0x0119; 0x0069; 0x012F; 0x006F; 0x01EB; 0x0075; 0x0173;
+	0x0044; 0x1E12; 0x0045; 0x1E18; 0x004C; 0x1E3C; 0x004E; 0x1E4A;
+	0x0054; 0x1E70; 0x0055; 0x1E76; 0x0064; 0x1E13; 0x0065; 0x1E19;
+	0x006C; 0x1E3D; 0x006E; 0x1E4B; 0x0074; 0x1E71; 0x0075; 0x1E77;
+	0x0048; 0x1E2A; 0x0068; 0x1E2B; 0x0045; 0x1E1A; 0x0049; 0x1E2C;
+	0x0055; 0x1E74; 0x0065; 0x1E1B; 0x0069; 0x1E2D; 0x0075; 0x1E75;
+	0x0042; 0x1E06; 0x0044; 0x1E0E; 0x004B; 0x1E34; 0x004C; 0x1E3A;
+	0x004E; 0x1E48; 0x0052; 0x1E5E; 0x0054; 0x1E6E; 0x005A; 0x1E94;
+	0x0062; 0x1E07; 0x0064; 0x1E0F; 0x0068; 0x1E96; 0x006B; 0x1E35;
+	0x006C; 0x1E3B; 0x006E; 0x1E49; 0x0072; 0x1E5F; 0x0074; 0x1E6F;
+	0x007A; 0x1E95; 0x003C; 0x226E; 0x003D; 0x2260; 0x003E; 0x226F;
+	0x2190; 0x219A; 0x2192; 0x219B; 0x2194; 0x21AE; 0x21D0; 0x21CD;
+	0x21D2; 0x21CF; 0x21D4; 0x21CE; 0x2203; 0x2204; 0x2208; 0x2209;
+	0x220B; 0x220C; 0x2223; 0x2224; 0x2225; 0x2226; 0x223C; 0x2241;
+	0x2243; 0x2244; 0x2245; 0x2247; 0x2248; 0x2249; 0x224D; 0x226D;
+	0x2261; 0x2262; 0x2264; 0x2270; 0x2265; 0x2271; 0x2272; 0x2274;
+	0x2273; 0x2275; 0x2276; 0x2278; 0x2277; 0x2279; 0x227A; 0x2280;
+	0x227B; 0x2281; 0x227C; 0x22E0; 0x227D; 0x22E1; 0x2282; 0x2284;
+	0x2283; 0x2285; 0x2286; 0x2288; 0x2287; 0x2289; 0x2291; 0x22E2;
+	0x2292; 0x22E3; 0x22A2; 0x22AC; 0x22A8; 0x22AD; 0x22A9; 0x22AE;
+	0x22AB; 0x22AF; 0x22B2; 0x22EA; 0x22B3; 0x22EB; 0x22B4; 0x22EC;
+	0x22B5; 0x22ED; 0x00A8; 0x1FC1; 0x03B1; 0x1FB6; 0x03B7; 0x1FC6;
+	0x03B9; 0x1FD6; 0x03C5; 0x1FE6; 0x03C9; 0x1FF6; 0x03CA; 0x1FD7;
+	0x03CB; 0x1FE7; 0x1F00; 0x1F06; 0x1F01; 0x1F07; 0x1F08; 0x1F0E;
+	0x1F09; 0x1F0F; 0x1F20; 0x1F26; 0x1F21; 0x1F27; 0x1F28; 0x1F2E;
+	0x1F29; 0x1F2F; 0x1F30; 0x1F36; 0x1F31; 0x1F37; 0x1F38; 0x1F3E;
+	0x1F39; 0x1F3F; 0x1F50; 0x1F56; 0x1F51; 0x1F57; 0x1F59; 0x1F5F;
+	0x1F60; 0x1F66; 0x1F61; 0x1F67; 0x1F68; 0x1F6E; 0x1F69; 0x1F6F;
+	0x1FBF; 0x1FCF; 0x1FFE; 0x1FDF; 0x0391; 0x1FBC; 0x0397; 0x1FCC;
+	0x03A9; 0x1FFC; 0x03AC; 0x1FB4; 0x03AE; 0x1FC4; 0x03B1; 0x1FB3;
+	0x03B7; 0x1FC3; 0x03C9; 0x1FF3; 0x03CE; 0x1FF4; 0x1F00; 0x1F80;
+	0x1F01; 0x1F81; 0x1F02; 0x1F82; 0x1F03; 0x1F83; 0x1F04; 0x1F84;
+	0x1F05; 0x1F85; 0x1F06; 0x1F86; 0x1F07; 0x1F87; 0x1F08; 0x1F88;
+	0x1F09; 0x1F89; 0x1F0A; 0x1F8A; 0x1F0B; 0x1F8B; 0x1F0C; 0x1F8C;
+	0x1F0D; 0x1F8D; 0x1F0E; 0x1F8E; 0x1F0F; 0x1F8F; 0x1F20; 0x1F90;
+	0x1F21; 0x1F91; 0x1F22; 0x1F92; 0x1F23; 0x1F93; 0x1F24; 0x1F94;
+	0x1F25; 0x1F95; 0x1F26; 0x1F96; 0x1F27; 0x1F97; 0x1F28; 0x1F98;
+	0x1F29; 0x1F99; 0x1F2A; 0x1F9A; 0x1F2B; 0x1F9B; 0x1F2C; 0x1F9C;
+	0x1F2D; 0x1F9D; 0x1F2E; 0x1F9E; 0x1F2F; 0x1F9F; 0x1F60; 0x1FA0;
+	0x1F61; 0x1FA1; 0x1F62; 0x1FA2; 0x1F63; 0x1FA3; 0x1F64; 0x1FA4;
+	0x1F65; 0x1FA5; 0x1F66; 0x1FA6; 0x1F67; 0x1FA7; 0x1F68; 0x1FA8;
+	0x1F69; 0x1FA9; 0x1F6A; 0x1FAA; 0x1F6B; 0x1FAB; 0x1F6C; 0x1FAC;
+	0x1F6D; 0x1FAD; 0x1F6E; 0x1FAE; 0x1F6F; 0x1FAF; 0x1F70; 0x1FB2;
+	0x1F74; 0x1FC2; 0x1F7C; 0x1FF2; 0x1FB6; 0x1FB7; 0x1FC6; 0x1FC7;
+	0x1FF6; 0x1FF7; 0x0627; 0x0622; 0x0627; 0x0623; 0x0648; 0x0624;
+	0x064A; 0x0626; 0x06C1; 0x06C2; 0x06D2; 0x06D3; 0x06D5; 0x06C0;
+	0x0627; 0x0625; 0x0928; 0x0929; 0x0930; 0x0931; 0x0933; 0x0934;
+	0x09C7; 0x09CB; 0x09C7; 0x09CC; 0x0B47; 0x0B4B; 0x0B47; 0x0B48;
+	0x0B47; 0x0B4C; 0x0BC6; 0x0BCA; 0x0BC7; 0x0BCB; 0x0B92; 0x0B94;
+	0x0BC6; 0x0BCC; 0x0C46; 0x0C48; 0x0CC6; 0x0CCA; 0x0CBF; 0x0CC0;
+	0x0CC6; 0x0CC7; 0x0CCA; 0x0CCB; 0x0CC6; 0x0CC8; 0x0D46; 0x0D4A;
+	0x0D47; 0x0D4B; 0x0D46; 0x0D4C; 0x0DD9; 0x0DDA; 0x0DDC; 0x0DDD;
+	0x0DD9; 0x0DDC; 0x0DD9; 0x0DDE; 0x1025; 0x1026; 0x3046; 0x3094;
+	0x304B; 0x304C; 0x304D; 0x304E; 0x304F; 0x3050; 0x3051; 0x3052;
+	0x3053; 0x3054; 0x3055; 0x3056; 0x3057; 0x3058; 0x3059; 0x305A;
+	0x305B; 0x305C; 0x305D; 0x305E; 0x305F; 0x3060; 0x3061; 0x3062;
+	0x3064; 0x3065; 0x3066; 0x3067; 0x3068; 0x3069; 0x306F; 0x3070;
+	0x3072; 0x3073; 0x3075; 0x3076; 0x3078; 0x3079; 0x307B; 0x307C;
+	0x309D; 0x309E; 0x30A6; 0x30F4; 0x30AB; 0x30AC; 0x30AD; 0x30AE;
+	0x30AF; 0x30B0; 0x30B1; 0x30B2; 0x30B3; 0x30B4; 0x30B5; 0x30B6;
+	0x30B7; 0x30B8; 0x30B9; 0x30BA; 0x30BB; 0x30BC; 0x30BD; 0x30BE;
+	0x30BF; 0x30C0; 0x30C1; 0x30C2; 0x30C4; 0x30C5; 0x30C6; 0x30C7;
+	0x30C8; 0x30C9; 0x30CF; 0x30D0; 0x30D2; 0x30D3; 0x30D5; 0x30D6;
+	0x30D8; 0x30D9; 0x30DB; 0x30DC; 0x30EF; 0x30F7; 0x30F0; 0x30F8;
+	0x30F1; 0x30F9; 0x30F2; 0x30FA; 0x30FD; 0x30FE; 0x306F; 0x3071;
+	0x3072; 0x3074; 0x3075; 0x3077; 0x3078; 0x307A; 0x307B; 0x307D;
+	0x30CF; 0x30D1; 0x30D2; 0x30D4; 0x30D5; 0x30D7; 0x30D8; 0x30DA;
+	0x30DB; 0x30DD
+|]
+
+let uniCharCombiningBitmap = "\
+\x00\x00\x00\x01\x02\x03\x04\x05\
+\x00\x06\x07\x08\x09\x0A\x0B\x0C\
+\x0D\x14\x00\x00\x00\x00\x00\x0E\
+\x0F\x00\x00\x00\x00\x00\x00\x00\
+\x10\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x11\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x12\x00\x00\x13\x00\
+\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
+\xFF\xFF\x00\x00\xFF\xFF\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x78\x03\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\xFE\xFF\xFB\xFF\xFF\xBB\
+\x16\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\xF8\x3F\x00\x00\x00\x01\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\xC0\xFF\x9F\x3D\x00\x00\
+\x00\x00\x02\x00\x00\x00\xFF\xFF\
+\xFF\x07\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\xC0\xFF\x01\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x0E\x00\x00\x00\x00\x00\x00\xD0\
+\xFF\x3F\x1E\x00\x0C\x00\x00\x00\
+\x0E\x00\x00\x00\x00\x00\x00\xD0\
+\x9F\x39\x80\x00\x0C\x00\x00\x00\
+\x04\x00\x00\x00\x00\x00\x00\xD0\
+\x87\x39\x00\x00\x00\x00\x03\x00\
+\x0E\x00\x00\x00\x00\x00\x00\xD0\
+\xBF\x3B\x00\x00\x00\x00\x00\x00\
+\x0E\x00\x00\x00\x00\x00\x00\xD0\
+\x8F\x39\xC0\x00\x00\x00\x00\x00\
+\x04\x00\x00\x00\x00\x00\x00\xC0\
+\xC7\x3D\x80\x00\x00\x00\x00\x00\
+\x0E\x00\x00\x00\x00\x00\x00\xC0\
+\xDF\x3D\x60\x00\x00\x00\x00\x00\
+\x0C\x00\x00\x00\x00\x00\x00\xC0\
+\xDF\x3D\x60\x00\x00\x00\x00\x00\
+\x0C\x00\x00\x00\x00\x00\x00\xC0\
+\xCF\x3D\x80\x00\x00\x00\x00\x00\
+\x0C\x00\x00\x00\x00\x00\x00\x00\
+\x00\x84\x5F\xFF\x00\x00\x0C\x00\
+\x00\x00\x00\x00\x00\x00\xF2\x07\
+\x80\x7F\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\xF2\x1B\
+\x00\x3F\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x03\x00\x00\xA0\xC2\
+\x00\x00\x00\x00\x00\x00\xFE\xFF\
+\xDF\x00\xFF\xFE\xFF\xFF\xFF\x1F\
+\x40\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\xF0\xC7\x03\
+\x00\x00\xC0\x03\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x1C\x00\x00\x00\x1C\x00\
+\x00\x00\x0C\x00\x00\x00\x0C\x00\
+\x00\x00\x00\x00\x00\x00\xF0\xFF\
+\xFF\xFF\x0F\x00\x00\x00\x00\x00\
+\x00\x38\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x02\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\xFF\xFF\xFF\x07\x00\x00\
+\x00\x00\x00\x00\x00\xFC\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x06\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x40\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\xFF\xFF\x00\x00\x0F\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\xFE\xFF\x3F\x00\
+\x00\x00\x00\x00\x00\xFF\xFF\xFF\
+\x07\x00\x00\x00\x00\x00\x00\x00"
+
+(****)
+
+let bitmap_test base bitmap character =
+  character >= base && character < 0x10000
+    &&
+  (let value = get bitmap ((character lsr 8) land 0xFF) in
+   value = 0xFF
+      ||
+   (value <> 0
+       &&
+    get bitmap ((value - 1) * 32 + 256 + (character land 0xFF) / 8)
+      land (1 lsl (character land 7)) <> 0))
+
+let unicode_combinable character =
+  bitmap_test 0x0300 uniCharCombiningBitmap character
+
+let rec find_rec t i j v =
+  if i + 1 = j then begin
+    if t.(i * 2) = v then t.(i * 2 + 1) else 0
+  end else begin
+    let k = (i + j) / 2 in
+    if v < t.(k * 2) then
+      find_rec t i k v
+    else
+      find_rec t k j v
+  end
+
+let find t i n v =
+  let j = i + n in
+  if v < t.(2 * i) || v > t.(2 * (j - 1)) then 0 else
+  find_rec t i j v
+
+let uniCharPrecompSourceTableLen = Array.length uniCharPrecompSourceTable / 2
+
+let combine v v' =
+  if v' >= hangul_vbase && v' < hangul_tbase + hangul_tcount then begin
+    if
+      v' < hangul_vbase + hangul_vcount &&
+      v >= hangul_lbase && v < hangul_lbase + hangul_lcount
+    then
+      hangul_sbase + ((v - hangul_lbase) * (hangul_vcount * hangul_tcount)) +
+                     ((v' - hangul_vbase) * hangul_tcount)
+    else if
+      v' > hangul_tbase &&
+      v >= hangul_sbase && v < hangul_sbase + hangul_scount
+    then
+      if (v - hangul_sbase) mod hangul_tcount <> 0 then 0 else
+      v + v' - hangul_tbase
+    else
+      0
+  end else begin
+    let k =
+      find uniCharPrecompSourceTable 0
+        uniCharPrecompSourceTableLen v'
+    in
+    if k = 0 then 0 else
+    find uniCharBMPPrecompDestinationTable (k land 0xFFFF) (k lsr 16) v
+  end
+
+(****)
+
+let rec scan d s i l =
+  if i < l then begin
+    let c = get s i in
+    if c < 0x80 then
+      cont d s i l (i + 1) c
+    else if c < 0xE0 then begin
+      (* 80 - 7FF *)
+      if c < 0xc2 || i + 1 >= l then fail () else
+      let c1 = get s (i + 1) in
+      if c1 land 0xc0 <> 0x80 then fail () else
+      let v = c lsl 6 + c1 - 0x3080 in
+      cont d s i l (i + 2) v
+    end else if c < 0xF0 then begin
+      (* 800 - FFFF *)
+      if i + 2 >= l then fail () else
+      let c1 = get s (i + 1) in
+      let c2 = get s (i + 2) in
+      if (c1 lor c2) land 0xc0 <> 0x80 then fail () else
+      let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
+      if v < 0x800 then fail () else
+      cont d s i l (i + 3) v
+    end else begin
+      (* 10000 - 10FFFF *)
+      if i + 3 >= l then fail () else
+      let c1 = get s (i + 1) in
+      let c2 = get s (i + 2) in
+      let c3 = get s (i + 3) in
+      if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else
+      let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
+      if v < 0x10000 || v > 0x10ffff then fail () else
+      cont d s i l (i + 4) v
+    end
+  end else begin
+    let (i1, i2) = d in
+    String.blit s i2 s i1 (l - i2);
+    String.sub s 0 (i1 + l - i2)
+  end
+
+and cont d s i l j v' =
+  if unicode_combinable v' then begin
+    let i = prev_char s i in
+    let (v, _) = decode_char s i l in
+    let v'' = combine v v' in
+    if v'' = 0 then
+      scan d s j l
+    else begin
+      let (i1, i2) = d in
+      String.blit s i2 s i1 (i - i2);
+      let i1 = i1 + i - i2 in
+      let (v'', i) = compose_rec s j l v'' in
+      let i1 = encode_char s i1 l v'' in
+      scan (i1, i) s i l
+    end
+  end else
+    scan d s j l
+
+and compose_rec s i l v =
+  try
+    let (v', j) = decode_char s i l in
+    if unicode_combinable v' then begin
+      let v'' = combine v v' in
+      if v'' = 0 then
+        (v, i)
+      else
+        compose_rec s j l v''
+    end else
+      (v, i)
+  with Invalid ->
+    (v, i)
+
+let compose s =
+  try scan (0, 0) (String.copy s) 0 (String.length s) with Invalid -> s
+
+(***)
+
+let set_2 s i v =
+  set s i (v land 0xff);
+  set s (i + 1) (v lsr 8)
+
+let get_2 s i = (get s (i + 1)) lsl 8 + get s i
+
+let rec scan s' j s i l =
+  if i < l then begin
+    let c = get s i in
+    if c < 0x80 then
+      cont s' j s (i + 1) l c
+    else if c < 0xE0 then begin
+      (* 80 - 7FF *)
+      if c < 0xc2 || i + 1 >= l then fail () else
+      let c1 = get s (i + 1) in
+      if c1 land 0xc0 <> 0x80 then fail () else
+      let v = c lsl 6 + c1 - 0x3080 in
+      cont s' j s (i + 2) l v
+    end else if c < 0xF0 then begin
+      (* 800 - FFFF *)
+      if i + 2 >= l then fail () else
+      let c1 = get s (i + 1) in
+      let c2 = get s (i + 2) in
+      if (c1 lor c2) land 0xc0 <> 0x80 then fail () else
+      let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
+      if v < 0x800 then fail () else
+      cont s' j s (i + 3) l v
+    end else begin
+      (* 10000 - 10FFFF *)
+      if i + 3 >= l then fail () else
+      let c1 = get s (i + 1) in
+      let c2 = get s (i + 2) in
+      let c3 = get s (i + 3) in
+      if (c1 lor c2 lor c3) land 0xc0 <> 0x80 then fail () else
+      let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
+      if v < 0x10000 || v > 0x10ffff then fail () else
+      cont s' j s (i + 4) l v
+    end
+  end else
+    String.sub s' 0 j
+
+and cont s' j s i l v =
+  if v < 0x10000 then begin
+    set_2 s' j v;
+    scan s' (j + 2) s i l
+  end else begin
+   let v = v - 0x10000 in
+   set_2 s' j (v lsr 10 + 0xD800);
+   set_2 s' (j + 2) (v land 0x3FF + 0xDC00);
+   scan s' (j + 4) s i l
+  end
+
+let to_utf_16 s =
+  let l = String.length s in
+  let s' = String.create (2 * l) in
+  scan s' 0 s 0 l
+
+(****)
+
+let rec scan s' i' l' s i l =
+  if i + 2 <= l then begin
+    let v = get_2 s i in
+    if v < 0xD800 || v > 0xDFFF then
+      let i' = encode_char s' i' l' v in
+      scan s' i' l' s (i + 2) l
+    else if v >= 0xdc00 || i + 4 > l then
+      fail ()
+    else begin
+      let v' = get_2 s (i + 2) in
+      if v' < 0xDC00 || v' > 0XDFFF then fail () else
+      let i' =
+        encode_char s' i' l' ((v - 0xD800) lsl 10 + (v' - 0xDC00) + 0x10000)
+      in
+      scan s' i' l' s (i + 4) l
+    end
+  end else if i < l then
+    fail ()
+  else
+    String.sub s' 0 i'
+
+let from_utf_16 s =
+  let l = String.length s in
+  let l' = 3 * l / 2 in
+  let s' = String.create l' in
+  scan s' 0 l' s 0 l
+
+(****)
+
+let rec scan s i l =
+  i = l ||
+  let c = get s i in
+  if c < 0x80 then
+    scan s (i + 1) l
+  else if c < 0xE0 then begin
+    (* 80 - 7FF *)
+    c >= 0xc2 && i + 1 < l &&
+    let c1 = get s (i + 1) in
+    c1 land 0xc0 = 0x80 &&
+    scan s (i + 2) l
+  end else if c < 0xF0 then begin
+    (* 800 - FFFF *)
+    i + 2 < l &&
+    let c1 = get s (i + 1) in
+    let c2 = get s (i + 2) in
+    (c1 lor c2) land 0xc0 = 0x80 &&
+    let v = c lsl 12 + c1 lsl 6 + c2 - 0xe2080 in
+    v >= 0x800 &&
+    scan s (i + 3) l
+  end else begin
+    (* 10000 - 10FFFF *)
+    i + 3 < l &&
+    let c1 = get s (i + 1) in
+    let c2 = get s (i + 2) in
+    let c3 = get s (i + 3) in
+    (c1 lor c2 lor c3) land 0xc0 = 0x80 &&
+    let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
+    v >= 0x10000 && v < 0x10ffff &&
+    scan s (i + 4) l
+  end
+
+let check_utf_8 s = scan s 0 (String.length s)

Added: trunk/src/unicode.mli
===================================================================
--- trunk/src/unicode.mli	                        (rev 0)
+++ trunk/src/unicode.mli	2009-05-04 18:48:23 UTC (rev 326)
@@ -0,0 +1,23 @@
+(* Unison file synchronizer: src/unicode.mli *)
+(* Copyright 1999-2009, Benjamin C. Pierce (see COPYING for details) *)
+
+
+(* Case-insensitive comparison.  If two strings are equal according to
+   Mac OS X (Darwin, actually, but the algorithm has hopefully
+   remained unchanged) or Windows (Samba), then this function returns 0 *)
+val compare : string -> string -> int
+
+(* Corresponding normalization *)
+val normalize : string -> string
+
+(* Compose Unicode strings.  This reverts the decomposition performed
+   by Mac OS X. *)
+val compose : string -> string
+
+(* Convert to and from little-endian UTF-16 encoding *)
+(*XXX What about null-termination? *)
+val to_utf_16 : string -> string
+val from_utf_16 : string -> string
+
+(* Check wether the string contains only well-formed UTF-8 characters *)
+val check_utf_8 : string -> bool

Added: trunk/src/unicode_tables.ml
===================================================================
--- trunk/src/unicode_tables.ml	                        (rev 0)
+++ trunk/src/unicode_tables.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -0,0 +1,11 @@
+(*-*-coding: utf-8;-*-*)
+let ascii_lower =
+  "\000\001\002\003\004\005\006\007\008\t\n\011\012\013\014\015\016\017\018\019\020\021\022\023\024\025\026\027\028\029\030\031 !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127"
+let norm_repl =
+  "\003aÌ€\003á\003aÌ‚\003ã\003ä\003aÌŠ\002æ\003ç\003eÌ€\003é\003eÌ‚\003ë\003iÌ€\003í\003iÌ‚\003ï\002ð\003ñ\003oÌ€\003ó\003oÌ‚\003õ\003ö\002ø\003uÌ€\003ú\003uÌ‚\003ü\003ý\002þ\003ÿ\003aÌ„\003ă\003ą\003ć\003cÌ‚\003ċ\003cÌŒ\003dÌŒ\002Ä‘\003eÌ„\003ĕ\003ė\003ę\003eÌŒ\003gÌ‚\003ğ\003ġ\003ģ\003hÌ‚\002ħ\003ĩ\003iÌ„\003ĭ\003į\003i̇\002ij\003jÌ‚\003ķ\003ĺ\003ļ\003lÌŒ\002Å€\002Å‚\003ń\003ņ\003nÌŒ\002Å‹\003oÌ„\003ŏ\003oÌ‹\002Å“\003ŕ\003ŗ\003rÌŒ\003ś\003sÌ‚\003ş\003sÌŒ\003ţ\003tÌŒ\002ŧ\003ũ\003uÌ„\003ŭ\003uÌŠ\003uÌ‹\003ų\003wÌ‚\003yÌ‚\003ź\003ż\003zÌŒ\002É“\002ƃ\002Æ…\002É”\002ƈ\002É–\002É—\002ÆŒ\002ǝ\002É™\002É›\002Æ’\002É \002É£\002É©\002ɨ\002Æ™\002ɯ\002ɲ\002ɵ\003oÌ›\002Æ£\002Æ¥\002ƨ\002ʃ\002Æ­\002ʈ\003uÌ›\002ÊŠ\002Ê‹\002Æ´\002ƶ\002Ê’\002ƹ\002ƽ\002dž\002lj\002ÇŒ\003aÌŒ\003iÌŒ\003oÌŒ\003uÌŒ\005ǖ\005ǘ\005ǚ\005ǜ\005ǟ\005ǡ\004ǣ\002Ç¥\003gÌŒ\003kÌŒ\003oÌ!
 ¨\005ǭ\004Ê’ÌŒ\003jÌŒ\002dz\003ǵ\003nÌ€\005ǻ\004ǽ\004ǿ\003ȁ\003aÌ‘\003ȅ\003eÌ‘\003ȉ\003iÌ‘\003ȍ\003oÌ‘\003ȑ\003rÌ‘\003ȕ\003uÌ‘\003ș\003ț\003hÌŒ\003ȧ\003ȩ\005ȫ\005ȭ\003ȯ\005ȱ\003yÌ„\002Ì€\002́\002Ì“\004̈́\002ʹ\001;\004΅\004ά\002·\004έ\004ή\004ί\004ό\004ύ\004ώ\006ΐ\002α\002β\002γ\002δ\002ε\002ζ\002η\002θ\002ι\002κ\002λ\002μ\002ν\002ξ\002ο\002Ï€\002ρ\002σ\002Ï„\002Ï…\002φ\002χ\002ψ\002ω\004ϊ\004ϋ\006ΰ\004ϓ\004ϔ\002Ï£\002Ï¥\002ϧ\002Ï©\002Ï«\002Ï­\002ϯ\004ѐ\004ё\002Ñ’\004ѓ\002Ñ”\002Ñ•\002Ñ–\004ї\002ј\002Ñ™\002Ñš\002Ñ›\004ќ\004ѝ\004ў\002ÑŸ\002а\002б\002в\002г\002д\002е\002ж\002з\002и\004й\002к\002л\002м\002н\002о\002п\002Ñ€\002с\002Ñ‚\002у\002Ñ„\002Ñ…\002ц\002ч\002ш\002щ\002ÑŠ\002Ñ‹\002ÑŒ\002э\002ÑŽ\002я\002Ñ¡\002Ñ£\002Ñ¥\002ѧ\002Ñ©\002Ñ«\002Ñ­\002ѯ\002ѱ\002ѳ\002ѵ\004ѷ\002ѹ\002Ñ»\002ѽ\002Ñ¿\002ҁ\002Ò!
 ‘\002Ò“\002Ò•\002Ò—\002Ò™\002Ò›\002ҝ\002ÒŸ\002Ò¡\002Ò£\002Ò¥\!
 002Ò§\00
2ҩ\002ҫ\002ҭ\002ү\002ұ\002ҳ\002ҵ\002ҷ\002ҹ\002һ\002ҽ\002ҿ\004ӂ\002ӄ\002ӈ\002ӌ\004ӑ\004ӓ\002ӕ\004ӗ\002ә\004ӛ\004ӝ\004ӟ\002ӡ\004ӣ\004ӥ\004ӧ\002ө\004ӫ\004ӭ\004ӯ\004ӱ\004ӳ\004ӵ\004ӹ\002ա\002բ\002գ\002դ\002ե\002զ\002է\002ը\002թ\002ժ\002ի\002լ\002խ\002ծ\002կ\002հ\002ձ\002ղ\002ճ\002մ\002յ\002ն\002շ\002ո\002չ\002պ\002ջ\002ռ\002ս\002վ\002տ\002ր\002ց\002ւ\002փ\002ք\002օ\002ֆ\004آ\004أ\004ؤ\004إ\004ئ\004ۀ\004ۂ\004ۓ\006ऩ\006ऱ\006ऴ\006क़\006ख़\006ग़\006ज़\006ड़\006ढ़\006फ़\006य़\006ো\006ৌ\006ড়\006ঢ়\006য়\006ਲ਼\006ਸ਼\006ਖ਼\006ਗ਼\006ਜ਼\006ਫ਼\006ୈ\006ୋ\006ୌ\006ଡ଼\006ଢ଼\006ஔ\006ொ\006ோ\006ௌ\006ై\006ೀ\006ೇ\006ೈ\006ೊ\009ೋ\006ൊ\006ോ\006ൌ\006ේ\006ො\009ෝ\006ෞ\006!
 གྷ\006ཌྷ\006དྷ\006བྷ\006ཛྷ\006ཀྵ\006ཱི\006ཱུ\006ྲྀ\006ླྀ\006ཱྀ\006ྒྷ\006ྜྷ\006ྡྷ\006ྦྷ\006ྫྷ\006ྐྵ\006ဦ\003ა\003ბ\003გ\003დ\003ე\003ვ\003ზ\003თ\003ი\003კ\003ლ\003მ\003ნ\003ო\003პ\003ჟ\003რ\003ს\003ტ\003უ\003ფ\003ქ\003ღ\003ყ\003შ\003ჩ\003ც\003ძ\003წ\003ჭ\003ხ\003ჯ\003ჰ\003ჱ\003ჲ\003ჳ\003ჴ\003ჵ\003ḁ\003ḃ\003ḅ\003ḇ\005ḉ\003ḋ\003ḍ\003ḏ\003ḑ\003ḓ\005ḕ\005ḗ\003ḙ\003ḛ\005ḝ\003ḟ\003ḡ\003ḣ\003ḥ\003ḧ\003ḩ\003ḫ\003ḭ\005ḯ\003ḱ\003ḳ\003ḵ\003ḷ\005ḹ\003ḻ\003ḽ\003ḿ\003ṁ\003ṃ\003ṅ\003ṇ\003ṉ\003ṋ\005ṍ\005ṏ\005ṑ\005ṓ\003ṕ\003ṗ\003ṙ\003ṛ\005ṝ\003ṟ\003ṡ\003ṣ\005ṥ\005ṧ\005ṩ\003ṫ\003ṭ\003ṯ\003ṱ\003ṳ\003ṵ\003ṷ\005ṹ\005ṻ\003ṽ\003ṿ\003ẁ\003ẃ\003ẅ\003ẇ\003ẉ\003ẋ\003ẍ\003ẏ\003ẑ\00!
 3ẓ\003ẕ\003ẖ\003ẗ\003ẘ\003ẙ\004ẛ\003ạ\003ả\!
 005ấ
\005aÌ‚Ì€\005ẩ\005ẫ\005ậ\005ắ\005ằ\005ẳ\005ẵ\005ặ\003eÌ£\003ẻ\003ẽ\005ế\005eÌ‚Ì€\005ể\005ễ\005ệ\003ỉ\003iÌ£\003oÌ£\003ỏ\005ố\005oÌ‚Ì€\005ổ\005ỗ\005ộ\005ớ\005ờ\005ở\005ỡ\005ợ\003uÌ£\003ủ\005ứ\005ừ\005ử\005ữ\005ự\003yÌ€\003yÌ£\003ỷ\003ỹ\004ἀ\004ἁ\006ἂ\006ἃ\006ἄ\006ἅ\006ἆ\006ἇ\004ἐ\004ἑ\006ἒ\006ἓ\006ἔ\006ἕ\004ἠ\004ἡ\006ἢ\006ἣ\006ἤ\006ἥ\006ἦ\006ἧ\004ἰ\004ἱ\006ἲ\006ἳ\006ἴ\006ἵ\006ἶ\006ἷ\004ὀ\004ὁ\006ὂ\006ὃ\006ὄ\006ὅ\004Ï…Ì“\004Ï…Ì”\006Ï…Ì“Ì€\006ὓ\006ὔ\006ὕ\006Ï…Ì“Í‚\006ὗ\004ὠ\004ὡ\006ὢ\006ὣ\006ὤ\006ὥ\006ὦ\006ὧ\004ὰ\004ὲ\004ὴ\004ὶ\004ὸ\004Ï…Ì€\004ὼ\006ᾀ\006ᾁ\008ᾂ\008ᾃ\008ᾄ\008ᾅ\008ᾆ\008ἁÍ!
 ‚Í…\006ᾐ\006ᾑ\008ᾒ\008ᾓ\008ᾔ\008ᾕ\008ᾖ\008ᾗ\006ᾠ\006ᾡ\008ᾢ\008ᾣ\008ᾤ\008ᾥ\008ᾦ\008ᾧ\004ᾰ\004ᾱ\006ᾲ\004ᾳ\006ᾴ\004ᾶ\006ᾷ\004῁\006ῂ\004ῃ\006ῄ\004ῆ\006ῇ\005῍\005῎\005῏\004ῐ\004ῑ\006ῒ\004ῖ\006ῗ\005῝\005῞\005῟\004ῠ\004Ï…Ì„\006ῢ\004ῤ\004ῥ\004Ï…Í‚\006ῧ\004῭\001`\006ῲ\004ῳ\006ῴ\004ῶ\006ῷ\002´\000\003â…°\003â…±\003â…²\003â…³\003â…´\003â…µ\003â…¶\003â…·\003â…¸\003â…¹\003â…º\003â…»\003â…¼\003â…½\003â…¾\003â…¿\003ⓐ\003â“‘\003â“’\003â““\003â“”\003â“•\003â“–\003â“—\003ⓘ\003â“™\003â“š\003â“›\003â“œ\003ⓝ\003â“ž\003â“Ÿ\003â“ \003â“¡\003â“¢\003â“£\003ⓤ\003â“¥\003ⓦ\003ⓧ\003ⓨ\003â“©\006が\006ぎ\006ぐ\006げ\006ご\006ざ\006じ\006ず\006ぜ\006ぞ\006だ\006ぢ\006づ\006で\006ど\006ば\006!
 ぱ\006び\006ぴ\006ぶ\006ぷ\006べ\006ã!
 ¸ã‚š\006
ぼ\006ぽ\006ゔ\006ゞ\006ガ\006ギ\006グ\006ゲ\006ゴ\006ザ\006ジ\006ズ\006ゼ\006ゾ\006ダ\006ヂ\006ヅ\006デ\006ド\006バ\006パ\006ビ\006ピ\006ブ\006プ\006ベ\006ペ\006ボ\006ポ\006ヴ\006ヷ\006ヸ\006ヹ\006ヺ\006ヾ\004יִ\004ײַ\004שׁ\004שׂ\006שּׁ\006שּׂ\004אַ\004אָ\004אּ\004בּ\004גּ\004דּ\004הּ\004וּ\004זּ\004טּ\004יּ\004ךּ\004כּ\004לּ\004מּ\004נּ\004סּ\004ףּ\004פּ\004צּ\004קּ\004רּ\004שּ\004תּ\004וֹ\004בֿ\004כֿ\004פֿ\003a\003b\003c\003d\003e\003f\003g\003h\003i\003j\003k\003l\003m\003n\003o\003p\003q\003r\003s\003t\003u\003v\003w\003x\003y\003z"
+let norm_prim =


 000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\00!
 0\000\00

 \001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\052!
 \053\000\000\000\000\000\000\000\000\000\000\000\000\000\054\0!
 55\000\0
00\000"
+let norm_second_high =


 004\004\004\004\004\004\004\000\000\000\000\000\000\000\000\00!
 0\000\00


 \000\000\000\000\006\006\006\006\006\006\007\007\007\007\007\0!
 07\007\0


 0\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\!
 000\000\


 00\000\000\000\000\000\000\008\000\008\008\000\009\000\000\000!
 \000\000


 012\012\012\012\012\012\012\012\012\012\012\012\012\012\000\00!
 0\000\00

 \000\015\015\013\004\013\004\015\015\000\000\000\000\000\000\000\000\000\000\000\000\000\000\015\015\015\015\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\015\015\015\015\015\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\015\015\015\015\015\015\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\015\015\015\015\015\015\015\015\015\015\015\015\015\015\015\015\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000!
 \000\000\000\000\000\000\000\000\000\000\015\015\015\015\015\0!
 15\015\0


 8\018\018\018\018\018\018\018\018\018\018\018\018\018\018\018\!
 000\000\
000\000\000"
+let norm_second_low =

 225\225\000\229\229\233\233\237\237\241\000\244\000\247\247\251\251\255\255\000\003\000\006\006\010\010\014\014\018\000\021\021\025\025\029\029\033\033\037\037\041\041\045\045\049\049\053\053\057\000\060\060\064\064\068\068\072\072\076\076\080\080\084\084\088\088\116\092\092\096\096\100\100\000\000\104\107\000\110\000\113\116\000\119\122\125\000\000\128\131\134\137\000\140\143\000\146\149\152\000\000\000\155\158\000\161\164\164\168\000\171\000\000\174\000\177\000\000\180\000\183\186\186\190\193\196\000\199\000\202\205\000\000\000\208\000\000\000\000\000\000\000\211\211\000\214\214\000\217\217\000\220\220\224\224\228\228\232\232\236\236\242\242\248\248\254\254\000\004\004\010\010\016\016\021\000\024\024\028\028\032\032\036\036\042\042\047\051\051\000\054\054\000\000\058\058\062\062\068\068\073\073\078\078\082\082\086\086\090\090\094\094\098\098\102\102\106\106\110\110\114\114\118\118\122\122\126\126\130\130\000\000\134\134\000\000\000\000\000\000\138\138\142\142\146\146\152\!
 152\158\158\162\162\168\168\000\000\000\000\000\000\000\000\00!
 0\000\00
0\000\172\175\000\178\181\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\186\000\000\000\000\000\000\000\000\000\189\000\000\000\000\000\000\191\196\201\204\209\214\000\219\000\224\229\234\241\244\247\250\253\000\003\006\009\012\015\018\021\024\027\030\033\000\036\039\042\045\048\051\054\057\062\196\204\209\214\067\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\036\000\000\000\000\000\000\000\057\062\219\224\229\000\000\000\000\074\079\000\000\000\000\000\000\000\000\000\000\000\000\000\084\000\087\000\090\000\093\000\096\000\099\000\102\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\105\110\115\118\123\126\129\132\137\140\143\146\149\154\159\164\167\170\173\176\179\182\185\188\191\194\199\202\205\208\211\214\217\220\223\226\229\232\235\238\241\244\247\250\253\000\003\006\000\000\000\000\000\000!

 \000\000\000\000\240\243\246\249\252\255\002\005\008\011\014\0!
 17\020\0


 0\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\!
 000\000\


 00\000\000\000\000\000\000\238\000\245\252\000\003\000\000\000!
 \000\000

 000\000\000\000\000\000\000\000\000\000\000\218\218\222\222\226\226\230\230\234\234\240\240\244\244\248\248\252\252\000\000\004\004\010\010\016\016\020\020\024\024\030\030\034\034\038\038\042\042\046\046\050\050\054\054\058\058\062\062\068\068\072\072\076\076\080\080\084\084\090\090\094\094\098\098\102\102\106\106\110\110\114\114\118\118\122\122\126\126\132\132\138\138\144\144\150\150\154\154\158\158\162\162\166\166\172\172\176\176\180\180\184\184\190\190\196\196\202\202\206\206\210\210\214\214\218\218\222\222\226\226\230\230\236\236\242\242\246\246\250\250\254\254\002\002\006\006\010\010\014\014\018\018\022\022\026\026\030\030\034\034\038\042\046\050\000\054\000\000\000\000\059\059\063\063\067\067\073\073\079\079\085\085\091\091\097\097\103\103\109\109\115\115\121\121\127\127\131\131\135\135\139\139\145\145\151\151\157\157\163\163\169\169\173\173\177\177\181\181\185\185\191\191\197\197\203\203\209\209\215\215\221\221\227\227\233\233\239\239\245\245\249\249\253\253\003\003\!
 009\009\015\015\021\021\027\027\031\031\035\035\039\039\000\00!
 0\000\00


 \000\000\000\000\000\000\000\000\000\000\137\141\145\149\153\1!
 57\161\1

 3\000\000\100\107\114\121\000\000\000\128\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\135\000\140\000\000\000\000\000\000\000\000\000\000\145\150\155\162\169\174\179\184\189\194\199\204\209\000\214\219\224\229\234\000\239\000\244\249\000\254\003\000\008\013\018\023\028\033\038\043\048\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\072\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\053\057\061\065\069\073\077\081\085\089\09!
 3\097\101\105\109\113\117\121\125\129\133\137\141\145\149\153\!
 000\000\
000\000\000"

Modified: trunk/src/update.ml
===================================================================
--- trunk/src/update.ml	2009-05-03 00:20:31 UTC (rev 325)
+++ trunk/src/update.ml	2009-05-04 18:48:23 UTC (rev 326)
@@ -34,11 +34,13 @@
    representation does not change between unison versions.) *)
 (*FIX: Use similar_correct in props.ml next time the
   format is modified (see file props.ml for the new function) *)
-(*FIX: use Case.normalize next time the format is modified *)
 (*FIX: also change Fileinfo.stamp to drop the info.ctime component, next time the
   format is modified *)
 (*FIX: also make Jerome's suggested change about file times (see his mesg in
        unison-pending email folder). *)
+(*FIX: one should also store whether we are in case-insensitive mode
+  in the archive and check the mode has not changed when the archive
+  is loaded *)
 let archiveFormat = 22
 
 module NameMap = MyMap.Make (Name)
@@ -1185,20 +1187,8 @@
 
    Note that case conflicts and illegal filenames can only occur under Unix,
    when syncing with a Windows file system. *)
-let badWindowsFilenameRx =
-  (* FIX: This should catch all device names (like aux, con, ...).  I don't
-     know what all the possible device names are. *)
-  Rx.case_insensitive
-    (Rx.rx "\\.*|aux|con|lpt1|prn|(.*[\000-\031\\/<>:\"|].*)")
+let badFilename s = Name.bad (Prefs.read Globals.someHostIsRunningWindows) s
 
-let isBadWindowsFilename s =
-  (* FIX: should also check for a max filename length, not sure how much *)
-  Rx.match_string badWindowsFilenameRx (Name.toString s)
-let badFilename s =
-  (* Don't check unless we are syncing with Windows *)
-  Prefs.read Globals.someHostIsRunningWindows &&
-  isBadWindowsFilename s
-
 let getChildren fspath path =
   let children =
     (* We sort them in reverse order, as findDuplicate will reverse



More information about the Unison-hackers mailing list