[Unison-hackers] [unison-svn] r423 - in trunk/src: . system

vouillon@seas.upenn.edu vouillon at seas.upenn.edu
Thu Mar 25 10:43:01 EDT 2010


Author: vouillon
Date: 2010-03-25 10:43:01 -0400 (Thu, 25 Mar 2010)
New Revision: 423

Modified:
   trunk/src/Makefile.OCaml
   trunk/src/RECENTNEWS
   trunk/src/case.ml
   trunk/src/fileinfo.ml
   trunk/src/globals.ml
   trunk/src/mkProjectInfo.ml
   trunk/src/props.ml
   trunk/src/props.mli
   trunk/src/system/system_win.ml
   trunk/src/transfer.ml
   trunk/src/uicommon.ml
   trunk/src/unicode.ml
Log:
* Windows: more fixes for compilation with MSVC
* Rsync: somewhat faster compressor
* Changed "fat" option to not use chmod, to be on the safe side
  (perms = 0 + dontchmod = true)
* Some preference documentation updates
* Fail more gracefully when converting a non Unicode string to UTF-16
  (transient error with clear error message)


Modified: trunk/src/Makefile.OCaml
===================================================================
--- trunk/src/Makefile.OCaml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/Makefile.OCaml	2010-03-25 14:43:01 UTC (rev 423)
@@ -97,6 +97,7 @@
   # Win32 system
   EXEC_EXT=.exe
   OBJ_EXT=.obj
+  OUTPUT_SEL=/Fo
   CWD=.
 #  Fix suggested by Karl M, Jan 2009:
 #    "The new flexlink wrapper that OCaml 3.11 uses was gagging on the res
@@ -108,13 +109,14 @@
   COBJS+=system/system_win_stubs$(OBJ_EXT) lwt/lwt_unix_stubs$(OBJ_EXT)
   WINOBJS=system/system_win.cmo
   SYSTEM=win
-  CLIBS+=-cclib "-link win32rc/unison.res"
-  STATICLIBS+=-cclib "-link win32rc/unison.res"
+  CLIBS+=-cclib "-link win32rc/unison.res" shell32.lib
+  STATICLIBS+=-cclib "-link win32rc/unison.res" shell32.lib
   buildexecutable::
 	@echo Building for Windows
 else
   # Unix system, or Cygwin with GNU C compiler
   OBJ_EXT=.o
+  OUTPUT_SEL="-o "
   ifeq ($(OSARCH),win32gnuc)
     CWD=.
     EXEC_EXT=.exe
@@ -413,7 +415,7 @@
 
 %.o %.obj: %.c
 	@echo "$(OCAMLOPT): $< ---> $@"
-	$(CAMLC) $(CAMLFLAGS) -ccopt -o -ccopt $(CWD)/$@ -c $(CWD)/$<
+	$(CAMLC) $(CAMLFLAGS) -ccopt $(OUTPUT_SEL)$(CWD)/$@ -c $(CWD)/$<
 
 $(NAME)$(EXEC_EXT): $(CAMLOBJS) $(COBJS)
 	@echo Linking $@

Modified: trunk/src/RECENTNEWS
===================================================================
--- trunk/src/RECENTNEWS	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/RECENTNEWS	2010-03-25 14:43:01 UTC (rev 423)
@@ -1,5 +1,16 @@
 CHANGES FROM VERSION 2.40.1
 
+* Windows: more fixes for compilation with MSVC
+* Rsync: somewhat faster compressor
+* Changed "fat" option to not use chmod, to be on the safe side
+  (perms = 0 + dontchmod = true)
+* Some preference documentation updates
+* Fail more gracefully when converting a non Unicode string to UTF-16
+  (transient error with clear error message)
+
+-------------------------------
+CHANGES FROM VERSION 2.40.1
+
 * Windows: fix C includes for compilation with MSVC
 * Windows: implement somewhat the O_APPEND flag, so that appending
   lines to a profile (ignored files, for instance) works instead of

Modified: trunk/src/case.ml
===================================================================
--- trunk/src/case.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/case.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -30,9 +30,8 @@
      ^ "will treat all filenames as case sensitive.  Ordinarily, when the flag is "
      ^ "set to {\\tt default}, "
      ^ "filenames are automatically taken to be case-insensitive if "
-     ^ "either host is running Windows or OSX.  In rare circumstances it is  "
-     ^ "useful to set the flag manually (e.g. when running Unison on a  "
-     ^ "Unix system with a FAT [Windows] volume mounted).")
+     ^ "either host is running Windows or OSX.  In rare circumstances it may be  "
+     ^ "useful to set the flag manually.")
 
 (* Defining this variable as a preference ensures that it will be propagated
    to the other host during initialization *)

Modified: trunk/src/fileinfo.ml
===================================================================
--- trunk/src/fileinfo.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/fileinfo.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -26,9 +26,8 @@
       links will result in an error during update detection.  \
       Ordinarily, when the flag is set to {\\tt default}, symbolic \
       links are synchronized except when one of the hosts is running \
-      Windows.  In rare circumstances it is useful to set the flag \
-      manually (e.g. when running Unison on a Unix system with a FAT \
-      [Windows] volume mounted).")
+      Windows.  In rare circumstances it may be useful to set the flag \
+      manually.")
 
 let symlinksAllowed =
   Prefs.createBool "links-aux" true
@@ -155,8 +154,7 @@
       of inode numbers during fast update detection. \
       This switch should be used with care, as it \
       is less safe than the standard update detection method, but it \
-      can be useful for synchronizing VFAT filesystems (which do not \
-      support inode numbers) mounted on Unix systems.")
+      can be useful with filesystems which do not support inode numbers.")
 let _ = Prefs.alias ignoreInodeNumbers "pretendwin"
 
 let stamp info =

Modified: trunk/src/globals.ml
===================================================================
--- trunk/src/globals.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/globals.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -294,7 +294,8 @@
     ("When this is set to {\\tt true}, Unison will use appropriate options \
       to synchronize efficiently and without error a replica located on a \
       FAT filesystem on a non-Windows machine: \
-      only synchronize the write permission bit ({\\tt perms = 0o200}); \
+      do not synchronize permissions ({\\tt perms = 0}); \
+      never use chmod ({\tt dontchmod = true}); \
       treat filenames as case insensitive ({\\tt ignorecase = true}); \
       do not attempt to synchronize symbolic links ({\\tt links = false}); \
       ignore inode number changes when detecting updates \

Modified: trunk/src/mkProjectInfo.ml
===================================================================
--- trunk/src/mkProjectInfo.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/mkProjectInfo.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -101,3 +101,4 @@
 
 
 
+

Modified: trunk/src/props.ml
===================================================================
--- trunk/src/props.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/props.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -47,6 +47,7 @@
   val check : Fspath.t -> Path.local -> Unix.LargeFile.stats -> t -> unit
   val validatePrefs : unit -> unit
   val permMask : int Prefs.t
+  val dontChmod : bool Prefs.t
 end = struct
 
 (* We introduce a type, Perm.t, that holds a file's permissions along with   *)
@@ -84,7 +85,11 @@
      $0o1777$: all bits but the set-uid and set-gid bits are \
      synchronised (synchronizing theses latter bits can be a security \
      hazard).  If you want to synchronize all bits, you can set the \
-     value of this preference to $-1$."
+     value of this preference to $-1$.  If one of the replica is on \
+     a FAT [Windows] filesystem, you should consider using the \
+     {\tt fat} preference instead of this preference.  If you need \
+     Unison not to set permissions at all, set the value of this \
+     preference to $0$ and set the preference {\tt dontchmod} to {\tt true}."
 
 (* Os-specific local conventions on file permissions                         *)
 let (fileDefault, dirDefault, fileSafe, dirSafe) =
@@ -215,7 +220,18 @@
             Util.msg "Setting permissions for %s to %s (%s)\n"
               (Fspath.toDebugString abspath) (toString (fileperm2perm fp))
               (Printf.sprintf "%o/%o" fp mask));
-        Fs.chmod abspath fp)
+        try
+          Fs.chmod abspath fp
+        with Unix.Unix_error (Unix.EOPNOTSUPP, _, _) as e ->
+          try
+            Util.convertUnixErrorsToTransient "setting permissions"
+              (fun () -> raise e)
+          with Util.Transient msg ->
+            raise (Util.Transient
+                     (msg ^
+                      ". You can use preference \"fat\",\
+                       or else set preference \"perms\" to 0 and \
+                       preference \"dontchmod\" to true to avoid this error")))
 
 let get stats _ = (stats.Unix.LargeFile.st_perm, Prefs.read permMask)
 
@@ -766,6 +782,7 @@
 
 let syncModtimes = Time.sync
 let permMask = Perm.permMask
+let dontChmod = Perm.dontChmod
 
 let validatePrefs = Perm.validatePrefs
 

Modified: trunk/src/props.mli
===================================================================
--- trunk/src/props.mli	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/props.mli	2010-03-25 14:43:01 UTC (rev 423)
@@ -30,6 +30,7 @@
 
 val syncModtimes : bool Prefs.t
 val permMask : int Prefs.t
+val dontChmod : bool Prefs.t
 
 (* We are reusing the directory length to store a flag indicating that
    the directory is unchanged *)

Modified: trunk/src/system/system_win.ml
===================================================================
--- trunk/src/system/system_win.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/system/system_win.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -54,11 +54,26 @@
   else
     f
 
+let encodingError p =
+  raise
+    (Sys_error
+       (Format.sprintf "The file path '%s' is not encoded in Unicode." p))
+
 let utf8 = Unicode.from_utf_16
-let utf16 = Unicode.to_utf_16
+let utf16 s =
+  try
+    Unicode.to_utf_16 s
+  with Unicode.Invalid ->
+    raise (Sys_error
+             (Format.sprintf "The text '%' is not encoded in Unicode" s))
 let path8 = Unicode.from_utf_16(*_filename*)
-let path16 = Unicode.to_utf_16(*_filename*)
-let epath f = path16 (extendedPath f)
+let path16 f =
+  try Unicode.to_utf_16(*_filename*) f with Unicode.Invalid -> encodingError f
+let epath f =
+  try
+    Unicode.to_utf_16(*_filename*) (extendedPath f)
+  with
+    Unicode.Invalid -> encodingError f
 
 let sys_error e =
   match e with

Modified: trunk/src/transfer.ml
===================================================================
--- trunk/src/transfer.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/transfer.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -479,7 +479,7 @@
   let hash checksum = checksum
 
   (* Compute the hash table length as a function of the number of blocks *)
-  let hashTableLength signatures =
+  let computeHashTableLength signatures =
     2 * (upperPowerOfTwo signatures.blockCount 32)
 
   (* Hash the block signatures into the hash table *)
@@ -495,8 +495,28 @@
   (* Given a key, retrieve the corresponding entry in the table *)
   let findEntry hashTable hashTableLength checksum :
       (int * Checksum.t) list =
-    hashTable.((hash checksum) land (hashTableLength - 1))
+    Array.unsafe_get hashTable ((hash checksum) land (hashTableLength - 1))
 
+  let sigFilter hashTableLength signatures =
+    let len = hashTableLength lsl 2 in
+    let filter = String.make len '\000' in
+    for k = 0 to signatures.blockCount - 1 do
+      let cs = Int32.to_int signatures.weakChecksum.{k} land 0x7fffffff in
+      let h1 = cs lsr 28 in
+      assert (h1 >= 0 && h1 < 8);
+      let h2 = (cs lsr 5) land (len - 1) in
+      let mask = 1 lsl h1 in
+      filter.[h2] <- Char.chr (Char.code filter.[h2] lor mask)
+    done;
+    filter
+
+  let filterMem filter hashTableLength checksum =
+    let len = hashTableLength lsl 2 in
+    let h2 = (checksum lsr 5) land (len - 1) in
+    let h1 = checksum lsr 28 in
+    let mask = 1 lsl h1 in
+    Char.code (String.unsafe_get filter h2) land mask <> 0
+
   (* Log the values of the parameters associated with the hash table *)
   let logHash hashTable hashTableLength =
     let rec probe empty collision i =
@@ -522,19 +542,20 @@
   type probes = {
       mutable hitHit : int;
       mutable hitMiss : int;
+      mutable missMiss : int;
       mutable nbBlock : int;
       mutable nbString : int;
       mutable stringSize : int
     }
 
   let logMeasures pb =
-((*
     debugLog (fun() -> Util.msg
-        "hit-hit = %d, hit-miss = %d, hit rate = %d%%\n"
-        pb.hitHit pb.hitMiss
+        "hit-hit = %d, hit-miss = %d, miss-miss = %d, hit rate = %d%%\n"
+        pb.hitHit pb.hitMiss pb.missMiss
         (if pb.hitHit <> 0 then
            pb.hitHit * 100 / (pb.hitHit + pb.hitMiss)
-         else 0));
+         else 0))
+(*
     debugLog (fun() -> Util.msg
         "%d strings (%d bytes), %d blocks\n"
         pb.nbString pb.stringSize pb.nbBlock);
@@ -544,7 +565,7 @@
         generic);
     debug (fun() -> Util.msg
         "compression rate = %d%%\n" ((pb.stringSize * 100) / generic))
-*))
+*)
 
 
   (*** COMPRESSION ***)
@@ -553,6 +574,17 @@
   (* MUST be >= 2 * blockSize *)
   let minComprBufSize = 8192
 
+  type compressorState =
+    { (* Rolling checksum data *)
+      mutable checksum : int;
+      mutable cksumOutgoing : char;
+      (* Buffering *)
+      mutable offset : int;
+      mutable toBeSent : int;
+      mutable length : int;
+      (* Position in file *)
+      mutable absolutePos : Uutil.Filesize.t }
+
   (* Compress the file using the algorithm described in the header *)
   let rsyncCompress sigs infd srcLength showProgress transmit =
     debug (fun() -> Util.msg "compressing\n");
@@ -567,7 +599,8 @@
 
     (* Measures *)
     let pb =
-      { hitHit = 0; hitMiss = 0; nbBlock = 0; nbString = 0; stringSize = 0 } in
+      { hitHit = 0; hitMiss = 0; missMiss = 0;
+        nbBlock = 0; nbString = 0; stringSize = 0 } in
 (*
     let transmit tokenList =
       Safelist.iter
@@ -596,10 +629,12 @@
     let transmit token = queueToken tokenQueue showProgress transmit token in
 
     (* Set up the hash table for fast checksum look-up *)
-    let hashTableLength = ref (hashTableLength sigs) in
-    let blockTable = hashSig !hashTableLength sigs in
-    logHash blockTable !hashTableLength;
+    let hashTableLength = computeHashTableLength sigs in
+    let blockTable = hashSig hashTableLength sigs in
+    logHash blockTable hashTableLength;
 
+    let filter = sigFilter hashTableLength sigs in
+
     let rec fingerprintMatchRec checksums pos fp i =
       let i = i - 1 in
       i < 0 ||
@@ -623,107 +658,123 @@
     in
 
     (* Set up the rolling checksum data *)
-    let checksum = ref 0 in
-    let cksumOutgoing = ref ' ' in
-    let cksumTable = ref (Checksum.init blockSize) in
+    let cksumTable = Checksum.init blockSize in
 
-    let absolutePos = ref Uutil.Filesize.zero in
+    let initialState =
+      { checksum = 0; cksumOutgoing = ' ';
+        offset = comprBufSize; toBeSent = comprBufSize; length = comprBufSize;
+        absolutePos = Uutil.Filesize.zero }
+    in
 
     (* Check the new window position and update the compression buffer
        if its end has been reached *)
-    let rec slideWindow newOffset toBeSent length miss : unit Lwt.t =
-      if newOffset + blockSize <= length then
-        computeChecksum newOffset toBeSent length miss
-      else if length = comprBufSize then begin
-        transmitString toBeSent newOffset >>= (fun () ->
-        let chunkSize = length - newOffset in
+    let rec slideWindow st miss : unit Lwt.t =
+      if st.offset + blockSize <= st.length then
+        computeChecksum st miss
+      else if st.length = comprBufSize then begin
+        transmitString st.toBeSent st.offset >>= (fun () ->
+        let chunkSize = st.length - st.offset in
         if chunkSize > 0 then begin
           assert(comprBufSize >= blockSize);
-          String.blit comprBuf newOffset comprBuf 0 chunkSize
+          String.blit comprBuf st.offset comprBuf 0 chunkSize
         end;
-        let rem = Uutil.Filesize.sub srcLength !absolutePos in
+        let rem = Uutil.Filesize.sub srcLength st.absolutePos in
         let avail = comprBufSize - chunkSize in
         let l =
           reallyRead infd comprBuf chunkSize
             (if rem > comprBufSizeFS then avail else
              min (Uutil.Filesize.toInt rem) avail)
         in
-        absolutePos :=
-          Uutil.Filesize.add !absolutePos (Uutil.Filesize.ofInt l);
-        let length = chunkSize + l in
+        st.absolutePos <-
+          Uutil.Filesize.add st.absolutePos (Uutil.Filesize.ofInt l);
+        st.offset <- 0;
+        st.toBeSent <- 0;
+        st.length <- chunkSize + l;
         debugToken (fun() -> Util.msg "updating the compression buffer\n");
-        debugToken (fun() -> Util.msg "new length = %d bytes\n" length);
-        slideWindow 0 0 length miss)
+        debugToken (fun() -> Util.msg "new length = %d bytes\n" st.length);
+        slideWindow st miss)
       end else
-        transmitString toBeSent length >>= (fun () ->
+        transmitString st.toBeSent st.length >>= (fun () ->
         transmit EOF)
 
     (* Compute the window contents checksum, in a rolling fashion if there
        was a miss *)
-    and computeChecksum newOffset toBeSent length miss =
+    and computeChecksum st miss =
+      if miss then
+        rollChecksum st
+      else begin
+        let cksum = Checksum.substring comprBuf st.offset blockSize in
+        st.checksum <- cksum;
+        st.cksumOutgoing <- String.unsafe_get comprBuf st.offset;
+        processBlock st
+      end
+
+    and rollChecksum st =
+      let ingoingChar =
+        String.unsafe_get comprBuf (st.offset + blockSize - 1) in
       let cksum =
-        if miss then
-          Checksum.roll !cksumTable !checksum !cksumOutgoing
-            (String.unsafe_get comprBuf (newOffset + blockSize - 1))
-        else
-          Checksum.substring comprBuf newOffset blockSize
-      in
-      checksum := cksum;
-      cksumOutgoing := String.unsafe_get comprBuf newOffset;
-      processBlock newOffset toBeSent length cksum
+        Checksum.roll cksumTable st.checksum st.cksumOutgoing ingoingChar in
+      st.checksum <- cksum;
+      st.cksumOutgoing <- String.unsafe_get comprBuf st.offset;
+      if filterMem filter hashTableLength cksum then
+        processBlock st
+      else
+        miss st
 
     (* Try to match the current block with one existing in the old file *)
-    and processBlock offset toBeSent length checksum =
-      if Trace.enabled "transfer+" then
-        debugV (fun() -> Util.msg
-            "processBlock offset=%d toBeSent=%d length=%d blockSize = %d\n"
-            offset toBeSent length blockSize);
-      if Trace.enabled "rsynctoken" then assert
-         (0 <= toBeSent && toBeSent <= offset && offset + blockSize <= length);
-      match findEntry blockTable !hashTableLength checksum with
-      | [] -> miss offset toBeSent length
+    and processBlock st =
+      let checksum = st.checksum in
+      match findEntry blockTable hashTableLength checksum with
+      | [] ->
+          pb.missMiss <- pb.missMiss + 1;
+          miss st
       | entry ->
-          let blockNum = findBlock offset checksum entry None in
+          let blockNum = findBlock st checksum entry None in
           if blockNum = -1 then begin
               pb.hitMiss <- pb.hitMiss + 1;
-              miss offset toBeSent length
+              miss st
           end else begin
               pb.hitHit <- pb.hitHit + 1;
-              hit offset toBeSent length blockNum
+              hit st blockNum
           end
 
     (* In the hash table entry, find nodes with the right checksum and
        match fingerprints *)
-    and findBlock offset checksum entry fingerprint =
+    and findBlock st checksum entry fingerprint =
       match entry, fingerprint with
       | [], _ ->
           -1
       | (k, cs) :: tl, None
         when cs = checksum ->
-          let fingerprint = Digest.substring comprBuf offset blockSize in
-          findBlock offset checksum entry (Some fingerprint)
+          let fingerprint = Digest.substring comprBuf st.offset blockSize in
+          findBlock st checksum entry (Some fingerprint)
       | (k, cs) :: tl, Some fingerprint
         when cs = checksum && fingerprintMatch k fingerprint ->
           k
       | _ :: tl, _ ->
-          findBlock offset checksum tl fingerprint
+          findBlock st checksum tl fingerprint
 
     (* Miss : slide the window one character ahead *)
-    and miss offset toBeSent length =
-      slideWindow (offset + 1) toBeSent length true
+    and miss st =
+      st.offset <- st.offset + 1;
+      if st.offset + blockSize <= st.length then
+        rollChecksum st
+      else
+        slideWindow st true
 
     (* Hit : send the data waiting and a BLOCK token, then slide the window
        one block ahead *)
-    and hit offset toBeSent length blockNum =
-      transmitString toBeSent offset >>= (fun () ->
-      let sent = offset in
-      let toBeSent = sent + blockSize in
+    and hit st blockNum =
+      transmitString st.toBeSent st.offset >>= (fun () ->
+      let sent = st.offset in
+      st.toBeSent <- sent + blockSize;
       transmit (BLOCK blockNum) >>= (fun () ->
-      slideWindow (offset + blockSize) toBeSent length false))
+      st.offset <- st.offset + blockSize;
+      slideWindow st false))
     in
 
     (* Initialization and termination *)
-    slideWindow comprBufSize comprBufSize comprBufSize false >>= (fun () ->
+    slideWindow initialState false >>= (fun () ->
     flushTokenQueue () >>= (fun () ->
     logMeasures pb;
     Trace.showTimer timer;

Modified: trunk/src/uicommon.ml
===================================================================
--- trunk/src/uicommon.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/uicommon.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -458,7 +458,8 @@
   let someHostIsCaseInsensitive =
     someHostIsRunningWindows || someHostRunningOsX in
   if Prefs.read Globals.fatFilesystem then begin
-    Prefs.overrideDefault Props.permMask 0o200;
+    Prefs.overrideDefault Props.permMask 0;
+    Prefs.overrideDefault Props.dontChmod true;
     Prefs.overrideDefault Case.caseInsensitiveMode `True;
     Prefs.overrideDefault Fileinfo.allowSymlinks `False;
     Prefs.overrideDefault Fileinfo.ignoreInodeNumbers true

Modified: trunk/src/unicode.ml
===================================================================
--- trunk/src/unicode.ml	2010-03-23 15:07:12 UTC (rev 422)
+++ trunk/src/unicode.ml	2010-03-25 14:43:01 UTC (rev 423)
@@ -1733,7 +1733,7 @@
     let c3 = get s (i + 3) in
     (c1 lor c2 lor c3) land 0xc0 = 0x80 &&
     let v = c lsl 18 + c1 lsl 12 + c2 lsl 6 + c3 - 0x03c82080 in
-    v >= 0x10000 && v < 0x10ffff &&
+    v >= 0x10000 && v <= 0x10ffff &&
     scan s (i + 4) l
   end
 



More information about the Unison-hackers mailing list