From bcpierce at seas.upenn.edu Mon Jul 19 09:04:27 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Mon, 19 Jul 2010 09:04:27 -0400 Subject: [Unison-hackers] [unison-svn] r456 - in trunk/src: . ubase Message-ID: <201007191304.o6JD4RkA018930@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-19 09:04:26 -0400 (Mon, 19 Jul 2010) New Revision: 456 Modified: trunk/src/RECENTNEWS trunk/src/mkProjectInfo.ml trunk/src/ubase/myMap.mli trunk/src/uitext.ml trunk/src/update.ml trunk/src/update.mli Log: * Small patch from Stephane Glondu to make Unison compile with Ocaml 3.12. Modified: trunk/src/RECENTNEWS =================================================================== --- trunk/src/RECENTNEWS 2010-06-06 12:43:26 UTC (rev 455) +++ trunk/src/RECENTNEWS 2010-07-19 13:04:26 UTC (rev 456) @@ -1,3 +1,9 @@ +CHANGES FROM VERSION 2.42.-29 + +* Small patch from Stephane Glondu to make Unison compile with Ocaml 3.12. + + +------------------------------- CHANGES FROM VERSION 2.41.-27 * Try again to fix version number Modified: trunk/src/mkProjectInfo.ml =================================================================== --- trunk/src/mkProjectInfo.ml 2010-06-06 12:43:26 UTC (rev 455) +++ trunk/src/mkProjectInfo.ml 2010-07-19 13:04:26 UTC (rev 456) @@ -5,8 +5,8 @@ let projectName = "unison" let majorVersion = 2 -let minorVersion = 41 -let pointVersionOrigin = 452 (* Revision that corresponds to point version 0 *) +let minorVersion = 42 +let pointVersionOrigin = 454 (* Revision that corresponds to point version 0 *) (* Documentation: This is a program to construct a version of the form Major.Minor.Point, @@ -57,3 +57,4 @@ + Modified: trunk/src/ubase/myMap.mli =================================================================== --- trunk/src/ubase/myMap.mli 2010-06-06 12:43:26 UTC (rev 455) +++ trunk/src/ubase/myMap.mli 2010-07-19 13:04:26 UTC (rev 456) @@ -1,6 +1,6 @@ (* This file is taken from the Objective Caml standard library. -Some functions has been added to suite Unison needs. +Some functions have been added to suit Unison's needs. *) (***********************************************************************) (* *) Modified: trunk/src/uitext.ml =================================================================== --- trunk/src/uitext.ml 2010-06-06 12:43:26 UTC (rev 455) +++ trunk/src/uitext.ml 2010-07-19 13:04:26 UTC (rev 456) @@ -48,8 +48,7 @@ let cbreakMode = ref None -(* FIX: this may also work with Cygwin, but someone needs to try it... *) -let supportSignals = Util.osType = `Unix (*|| Util.isCygwin*) +let supportSignals = Util.osType = `Unix || Util.isCygwin let rawTerminal () = match !cbreakMode with Modified: trunk/src/update.ml =================================================================== --- trunk/src/update.ml 2010-06-06 12:43:26 UTC (rev 455) +++ trunk/src/update.ml 2010-07-19 13:04:26 UTC (rev 456) @@ -1089,7 +1089,9 @@ Unison will use the modification time and length of a file as a `pseudo inode number' \ when scanning replicas for updates, \ - instead of reading the full contents of every file. Under \ + instead of reading the full contents of every file. (This does not \ + apply to the very first run, when Unison will always scan \ + all files regarless of this switch). Under \ Windows, this may cause Unison to miss propagating an update \ if the modification time and length of the \ file are both unchanged by the update. However, Unison will never \ Modified: trunk/src/update.mli =================================================================== --- trunk/src/update.mli 2010-06-06 12:43:26 UTC (rev 455) +++ trunk/src/update.mli 2010-07-19 13:04:26 UTC (rev 456) @@ -1,7 +1,7 @@ (* Unison file synchronizer: src/update.mli *) (* Copyright 1999-2010, Benjamin C. Pierce (see COPYING for details) *) -module NameMap : Map.S with type key = Name.t +module NameMap : MyMap.S with type key = Name.t type archive = ArchiveDir of Props.t * archive NameMap.t From bcpierce at seas.upenn.edu Mon Jul 19 09:05:04 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Mon, 19 Jul 2010 09:05:04 -0400 Subject: [Unison-hackers] [unison-svn] r457 - branches/2.40/src Message-ID: <201007191305.o6JD555R018949@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-19 09:05:04 -0400 (Mon, 19 Jul 2010) New Revision: 457 Modified: branches/2.40/src/RECENTNEWS branches/2.40/src/mkProjectInfo.ml branches/2.40/src/update.mli Log: * Small patch from Stephane Glondu to make Unison compile with Ocaml 3.12. Modified: branches/2.40/src/RECENTNEWS =================================================================== --- branches/2.40/src/RECENTNEWS 2010-07-19 13:04:26 UTC (rev 456) +++ branches/2.40/src/RECENTNEWS 2010-07-19 13:05:04 UTC (rev 457) @@ -1,5 +1,10 @@ CHANGES FROM VERSION 2.40.16 +* Small patch from Stephane Glondu to make Unison compile with Ocaml 3.12. + +------------------------------- +CHANGES FROM VERSION 2.40.16 + * One more fix to Unicode case sensitive mode ------------------------------- Modified: branches/2.40/src/mkProjectInfo.ml =================================================================== --- branches/2.40/src/mkProjectInfo.ml 2010-07-19 13:04:26 UTC (rev 456) +++ branches/2.40/src/mkProjectInfo.ml 2010-07-19 13:05:04 UTC (rev 457) @@ -117,3 +117,4 @@ + Modified: branches/2.40/src/update.mli =================================================================== --- branches/2.40/src/update.mli 2010-07-19 13:04:26 UTC (rev 456) +++ branches/2.40/src/update.mli 2010-07-19 13:05:04 UTC (rev 457) @@ -1,7 +1,7 @@ (* Unison file synchronizer: src/update.mli *) (* Copyright 1999-2009, Benjamin C. Pierce (see COPYING for details) *) -module NameMap : Map.S with type key = Name.t +module NameMap : MyMap.S with type key = Name.t type archive = ArchiveDir of Props.t * archive NameMap.t From jay.levitt at akamai.com Mon Jul 19 11:16:28 2010 From: jay.levitt at akamai.com (Jay Levitt) Date: Mon, 19 Jul 2010 11:16:28 -0400 Subject: [Unison-hackers] fsmonitor.py & ignore paths on Linux Message-ID: <4C446C4C.6040909@akamai.com> I'm trying out the most excellent -repeat watch functionality in SVN, syncing between a Mac and a Linux machine; the directory has about 260,000 files in it, but I'm ignoring all but 5,000. The Mac detects changes instantly, but the Linux side has a few problems: - At startup, it takes about five minutes of 100% CPU crunching to populate the watcher paths - and this is five minutes on a machine with a hybrid-SSD drive! It then takes another 10-15 minutes of seeming idle before it starts working normally. It looks like it's adding every single file as its own inotify_watch. If I understand inotify correctly, you don't need to do that - all you need to do is watch each directory in the tree, and any contained files will also be notify. I'm guessing that doing a stat on each file (to see if it's a directory) would be more efficient than adding each watcher. - That's made significantly worse by the fact that it doesn't honor any ignore paths in the unison config; this probably would be a minor annoyance if it only had to iterate over 5K of files instead of 250K. - Now that I think about it, fsmonitor.py on Linux (server) is assuming that its ~/.unison directory contains the same profile as the Mac (client). In my case, it doesn't, only because I hadn't thought to copy it over. - But even worse: unison obeys fsmonitor's instruction and syncs up changed files - even if they were previously ignored. So, f'rinstance, the server's .unison archive files are getting synced to the client. Do you need any further info to reproduce these? I know the linux code is especially untested. I'd offer a patch, but I don't know Python, so that'd be more threat than offer. Jay Levitt From bcpierce at cis.upenn.edu Mon Jul 19 11:34:46 2010 From: bcpierce at cis.upenn.edu (Benjamin Pierce) Date: Mon, 19 Jul 2010 11:34:46 -0400 Subject: [Unison-hackers] fsmonitor.py & ignore paths on Linux In-Reply-To: <4C446C4C.6040909@akamai.com> References: <4C446C4C.6040909@akamai.com> Message-ID: Hi Jay, Thanks for the prod -- the last push on this stuff got fairly close to a working state, but (as you found out) there are still plenty of rough edges. I'd love to get it really working. Unfortunately, I don't have a linux server with a python installation including pyinotify, so I can't test that side myself. > - That's made significantly worse by the fact that it doesn't honor any > ignore paths in the unison config; this probably would be a minor > annoyance if it only had to iterate over 5K of files instead of 250K. > > - Now that I think about it, fsmonitor.py on Linux (server) is assuming > that its ~/.unison directory contains the same profile as the Mac > (client). In my case, it doesn't, only because I hadn't thought to copy > it over. Actually, this part of fsmonitor.py should be simpified: Unison can give it the ignore preferences on the command line (or in a file, if it gets too big) so it doesn't have to parse the preferences file itself. But we should certainly make it operate on a dir-by-dir basis in any case. > - But even worse: unison obeys fsmonitor's instruction and syncs up > changed files - even if they were previously ignored. So, f'rinstance, > the server's .unison archive files are getting synced to the client. That sounds like something isn't working on the unison side -- it's supposed to check for ignored files. > Do you need any further info to reproduce these? I know the linux code > is especially untested. I'd offer a patch, but I don't know Python, so > that'd be more threat than offer. (Python is pretty readable, by the way, and if you're familiar with the underlying OS stuff you can probably figure out how to at least add print statements enough to diagnose problems.) - Benjamin From Jerome.Vouillon at pps.jussieu.fr Tue Jul 20 04:08:53 2010 From: Jerome.Vouillon at pps.jussieu.fr (Jerome Vouillon) Date: Tue, 20 Jul 2010 10:08:53 +0200 Subject: [Unison-hackers] fsmonitor.py & ignore paths on Linux In-Reply-To: References: <4C446C4C.6040909@akamai.com> Message-ID: <20100720080853.GA761@pps.jussieu.fr> On Mon, Jul 19, 2010 at 11:34:46AM -0400, Benjamin Pierce wrote: > > - But even worse: unison obeys fsmonitor's instruction and syncs up > > changed files - even if they were previously ignored. So, f'rinstance, > > the server's .unison archive files are getting synced to the client. > > That sounds like something isn't working on the unison side -- it's supposed to check for ignored files. With the directive "ignore = Name .svn", Unison will indeed skip a directory "a/.svn" and then all files below. But, if explicitely given a path "a/.svn/b", it will synchronize this path. I believe this is the correct behavior. As a workaround, one can use a regular expression to explicitely ignore a directory and all its subdirectories: ignore = Regex .svn(/.*)? Unison should validate the output of fsmonitor.py using both the list of synchronized paths and the ignore directives, removing paths that do not corresponds to a file normally synchronized. -- Jerome From bcpierce at cis.upenn.edu Tue Jul 20 06:37:57 2010 From: bcpierce at cis.upenn.edu (Benjamin C. Pierce) Date: Tue, 20 Jul 2010 06:37:57 -0400 Subject: [Unison-hackers] fsmonitor.py & ignore paths on Linux In-Reply-To: <20100720080853.GA761@pps.jussieu.fr> References: <4C446C4C.6040909@akamai.com> <20100720080853.GA761@pps.jussieu.fr> Message-ID: <28E26C50-4C22-4507-8898-F680345612A7@cis.upenn.edu> On Jul 20, 2010, at 4:08 AM, Jerome Vouillon wrote: > Unison should validate the output of fsmonitor.py using both the list > of synchronized paths and the ignore directives, removing paths that > do not corresponds to a file normally synchronized. It should be doing (some version of) this already: let shouldNotIgnore p = let rec test prefix rest = if Globals.shouldIgnore prefix then false else match (Path.deconstruct rest) with None -> true | Some(n,rest') -> test (Path.child prefix n) rest' in test Path.empty (Path.fromString p) - B From jay.levitt at akamai.com Wed Jul 21 11:24:11 2010 From: jay.levitt at akamai.com (Jay Levitt) Date: Wed, 21 Jul 2010 11:24:11 -0400 Subject: [Unison-hackers] fsmonitor.py & ignore paths on Linux In-Reply-To: <28E26C50-4C22-4507-8898-F680345612A7@cis.upenn.edu> References: <4C446C4C.6040909@akamai.com> <20100720080853.GA761@pps.jussieu.fr> <28E26C50-4C22-4507-8898-F680345612A7@cis.upenn.edu> Message-ID: <4C47111B.7090306@akamai.com> Benjamin C. Pierce wrote: > On Jul 20, 2010, at 4:08 AM, Jerome Vouillon wrote: > >> Unison should validate the output of fsmonitor.py using both the list >> of synchronized paths and the ignore directives, removing paths that >> do not corresponds to a file normally synchronized. >> > > It should be doing (some version of) this already: > Oh, nevermind. I guess unison -server gets its ignore list from the server side, not from the client's ignore list. That's not quite intuitive, since I (myself) am running it only on the client. Maybe doing -repeat watch should require syncing of .unison/this.prf, or at least warn that it differs? -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.seas.upenn.edu/pipermail/unison-hackers/attachments/20100721/8640dbd0/attachment.htm From bcpierce at cis.upenn.edu Wed Jul 21 11:38:41 2010 From: bcpierce at cis.upenn.edu (Benjamin C. Pierce) Date: Wed, 21 Jul 2010 11:38:41 -0400 Subject: [Unison-hackers] fsmonitor.py & ignore paths on Linux In-Reply-To: <4C47111B.7090306@akamai.com> References: <4C446C4C.6040909@akamai.com> <20100720080853.GA761@pps.jussieu.fr> <28E26C50-4C22-4507-8898-F680345612A7@cis.upenn.edu> <4C47111B.7090306@akamai.com> Message-ID: > Oh, nevermind. I guess unison -server gets its ignore list from the server side, not from the client's ignore list. That's not quite intuitive, since I (myself) am running it only on the client. Maybe doing -repeat watch should require syncing of .unison/this.prf, or at least warn that it differs? No, it gets the ignore patterns from the client's profile. The profile is not read on the server side. - Benjamin From bcpierce at seas.upenn.edu Wed Jul 21 21:42:07 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Wed, 21 Jul 2010 21:42:07 -0400 Subject: [Unison-hackers] [unison-svn] r458 - in trunk/src: . ubase Message-ID: <201007220142.o6M1g7T7021084@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-21 21:42:07 -0400 (Wed, 21 Jul 2010) New Revision: 458 Modified: trunk/src/RECENTNEWS trunk/src/fingerprint.ml trunk/src/fingerprint.mli trunk/src/fpcache.ml trunk/src/mkProjectInfo.ml trunk/src/os.ml trunk/src/os.mli trunk/src/ubase/util.mli trunk/src/update.ml Log: * Added support for "pseudo-fingerprints", as a first step to implementing super-fast initial scans, following recent discussion on the unison-hackers list Modified: trunk/src/RECENTNEWS =================================================================== --- trunk/src/RECENTNEWS 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/RECENTNEWS 2010-07-22 01:42:07 UTC (rev 458) @@ -1,3 +1,10 @@ +CHANGES FROM VERSION 2.43.-29 + +* Added support for "pseudo-fingerprints", as a first step to + implementing super-fast initial scans, following recent discussion + on the unison-hackers list + +------------------------------- CHANGES FROM VERSION 2.42.-29 * Small patch from Stephane Glondu to make Unison compile with Ocaml 3.12. Modified: trunk/src/fingerprint.ml =================================================================== --- trunk/src/fingerprint.ml 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/fingerprint.ml 2010-07-22 01:42:07 UTC (rev 458) @@ -15,7 +15,6 @@ along with this program. If not, see . *) - (* NOTE: IF YOU CHANGE TYPE "FINGERPRINT", THE ARCHIVE FORMAT CHANGES; *) (* INCREMENT "UPDATE.ARCHIVEFORMAT" *) type t = string @@ -92,3 +91,9 @@ end let equal (d : string) d' = d = d' + +let pseudo_prefix = "LEN" + +let pseudo len = pseudo_prefix ^ (Uutil.Filesize.toString len) + +let ispseudo f = Util.startswith f pseudo_prefix Modified: trunk/src/fingerprint.mli =================================================================== --- trunk/src/fingerprint.mli 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/fingerprint.mli 2010-07-22 01:42:07 UTC (rev 458) @@ -17,3 +17,9 @@ val hash : t -> int val equal : t -> t -> bool + +(* A pseudo-fingerprint has the same type as a real one (so it can + be stored in the archive, etc.), but it is computed just from the + size of the file, ignoring the contents *) +val pseudo : Uutil.Filesize.t -> t +val ispseudo : t -> bool Modified: trunk/src/fpcache.ml =================================================================== --- trunk/src/fpcache.ml 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/fpcache.ml 2010-07-22 01:42:07 UTC (rev 458) @@ -239,7 +239,7 @@ let res = try let (oldDesc, oldDig, oldStamp, oldRess) as res = - PathTbl.find tbl (Path.toString path) in + PathTbl.find tbl (Path.toString path) in if not (clearlyUnchanged fastCheck path info oldDesc oldStamp oldRess) then Modified: trunk/src/mkProjectInfo.ml =================================================================== --- trunk/src/mkProjectInfo.ml 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/mkProjectInfo.ml 2010-07-22 01:42:07 UTC (rev 458) @@ -5,7 +5,7 @@ let projectName = "unison" let majorVersion = 2 -let minorVersion = 42 +let minorVersion = 43 let pointVersionOrigin = 454 (* Revision that corresponds to point version 0 *) (* Documentation: @@ -58,3 +58,4 @@ + Modified: trunk/src/os.ml =================================================================== --- trunk/src/os.ml 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/os.ml 2010-07-22 01:42:07 UTC (rev 458) @@ -274,6 +274,13 @@ let fullfingerprintEqual (fp, rfp) (fp', rfp') = Fingerprint.equal fp fp' && Fingerprint.equal rfp rfp' +let pseudoFingerprint size = + (Fingerprint.pseudo size, Fingerprint.dummy) + +let isPseudoFingerprint (fp,rfp) = + Fingerprint.ispseudo fp + + (*****************************************************************************) (* UNISON DIRECTORY *) (*****************************************************************************) Modified: trunk/src/os.mli =================================================================== --- trunk/src/os.mli 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/os.mli 2010-07-22 01:42:07 UTC (rev 458) @@ -45,3 +45,11 @@ Fspath.t -> Path.local -> (* coordinates of file to fingerprint *) Fileinfo.t -> (* old fileinfo *) fullfingerprint (* current fingerprint *) + +val pseudoFingerprint : + Uutil.Filesize.t -> (* size of file to "fingerprint" *) + fullfingerprint (* pseudo-fingerprint of this file (containing just + the file's length) *) + +val isPseudoFingerprint : + fullfingerprint -> bool Modified: trunk/src/ubase/util.mli =================================================================== --- trunk/src/ubase/util.mli 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/ubase/util.mli 2010-07-22 01:42:07 UTC (rev 458) @@ -47,7 +47,7 @@ (* String manipulation *) val truncateString : string -> int -> string -val startswith : string -> string -> bool +val startswith : string -> string -> bool (* STR,PREFIX *) val endswith : string -> string -> bool val findsubstring : string -> string -> int option val replacesubstring : string -> string -> string -> string (* IN,FROM,TO *) Modified: trunk/src/update.ml =================================================================== --- trunk/src/update.ml 2010-07-19 13:05:04 UTC (rev 457) +++ trunk/src/update.ml 2010-07-22 01:42:07 UTC (rev 458) @@ -15,7 +15,6 @@ along with this program. If not, see . *) - open Common let (>>=) = Lwt.(>>=) From bcpierce at cis.upenn.edu Wed Jul 21 21:48:09 2010 From: bcpierce at cis.upenn.edu (Benjamin Pierce) Date: Wed, 21 Jul 2010 21:48:09 -0400 Subject: [Unison-hackers] An idea for faster initial syncs In-Reply-To: <20100628094120.GD9740@pps.jussieu.fr> References: <20100628094120.GD9740@pps.jussieu.fr> Message-ID: <9D96B7E9-9196-4A39-B8C5-37577D59C236@cis.upenn.edu> Hi Jerome, When I started working on implementing this idea, I ran into a couple of questions: * If we have this, do we need the fingerprint cache any more? Can we justify the complexity of keeping both schemes? * It's tempting to make this a default behavior -- i.e., to *always* use a dummy fingerprint whenever we encounter a new file. The cost of this is that, if the file happens to get touched, it will get re-fingerprinted and show up as having been changed even if it has not really. With a little work, maybe this could be made invisible to the user most of the time; we'd have to notice when we had a recon item involving a "recently dummy" file (i.e., where one side is a dummy and the other is not) and re-fingerprint the non-dummy side. But I wonder if the cost (in terms of making the user's model of the system's default behavior more complex) is worth it. Thoughts? (Especially about the first...) - B On Jun 28, 2010, at 5:41 AM, Jerome Vouillon wrote: > On Thu, Jun 17, 2010 at 10:31:59AM -0400, Benjamin C. Pierce wrote: >> My idea is to add a switch that says to Unison "I know that the >> replicas are in sync already and I want to you rebuild your archives >> as fast as possible." When this switch is set, Unison would skip >> fingerprinting the contents of new files -- it would simply store a >> dummy fingerprint (a hash of the file's size and permissions) in the >> archive for each file. As long as files are never changed after >> this, this dummy fingerprint would never be looked at, so Unison's >> behavior would remain the same. If a file is changed at some point >> in the future, Unison will fingerprint the new contents, detect a >> change, and copy the new version to the other side, again behaving >> as it should. The one slight difference in behavior will be that if >> a file is really changed on one side but only touched on the other, >> Unison will detect a conflict rather than propagating the change. > > That's an interesting idea, indeed. It can also improve the user > experience when one of the replicas is initially empty: Unison will > start propagating files right away instead of spending a lot of time > scanning files. > > There should be a way to make Unison replace in the archives the dummy > fingerprints by the actual fingerprints. This requires a bit of work, > as we have to make sure that the archives are updated simultaneously. > But that could be implemented later. > >> Second, more seriously, if there is some file with different sizes >> (or that exists on one replica and not the other), Unison will >> calculate a dummy fingerprint during update detection and then later >> think that the file hasn't been transferred correctly because the >> fingerprints don't match. We may need a special case in the >> fingerprint check at the end that recalculates the fingerprint if it >> is a dummy. > > That's the most delicate part, indeed. > - when the archive contains a dummy fingerprint, we should not scan > the file contents do decide whether the file has been changed, > whether fastcheck is set to true or false, so that > Update.checkNoUpdates works properly; > - Copy.paranoidCheck should just return the computed checksum in case > of mismatch. Then, the appropriate action can be taken in > Copy.checkContentsChangeLocal, where we will have a possibly dummy > fingerprint from update detection and accurate fingerprints of the > source file and temporary destination file. > > As we have computed the actual fingerprint during file transfer, we > should put it in the archive. But that could be implemented in a > second step. > > -- Jerome > _______________________________________________ > Unison-hackers mailing list > Unison-hackers at lists.seas.upenn.edu > http://lists.seas.upenn.edu/mailman/listinfo/unison-hackers From hans_meine at gmx.net Thu Jul 22 06:56:23 2010 From: hans_meine at gmx.net (Hans Meine) Date: Thu, 22 Jul 2010 12:56:23 +0200 Subject: [Unison-hackers] An idea for faster initial syncs Message-ID: <201007221256.25728.hans_meine@gmx.net> On Thursday 22 July 2010 03:48:09 Benjamin Pierce wrote: > * It's tempting to make this a default behavior -- i.e., to *always* use a > dummy fingerprint whenever we encounter a new file. Yes, that would also be more consistent. > The cost of this is > that, if the file happens to get touched, it will get re-fingerprinted and > show up as having been changed even if it has not really. That would be ugly. I like to check all synchronized changes before confirming, and I hate getting "bogus" changes. (For instance, I recently started using OS X, and I got sooo many changes on the mac side, leading to an awful lot of conflicts, until I disabled the metadata syncing. At least, that seemed to be the cause.) I think waiting is less costly for the user than thinking about why some files changed and whether they should be sync'ed. > With a little > work, maybe this could be made invisible to the user most of the time; > we'd have to notice when we had a recon item involving a "recently dummy" > file (i.e., where one side is a dummy and the other is not) and > re-fingerprint the non-dummy side. That could work. Another idea would be to automatically fingerprint files with a dummy hash in the background in "spare"/idle time, i.e. while waiting for user input. IMHO that would get us the best of both worlds: quick initial responses from a user's POV, and proper fingerprints ASAP. The only downsides here are the increased implementation complexity and that the user should ideally be informed about the progress somehow (otherwise he/she might wonder why the HDD is working so much, or - even if the cause is known - when it will stop). Have a nice day, Hans From bcpierce at cis.upenn.edu Fri Jul 23 14:22:17 2010 From: bcpierce at cis.upenn.edu (Benjamin Pierce) Date: Fri, 23 Jul 2010 14:22:17 -0400 Subject: [Unison-hackers] An idea for faster initial syncs In-Reply-To: <201007221256.25728.hans_meine@gmx.net> References: <201007221256.25728.hans_meine@gmx.net> Message-ID: <1AA04C1F-AC0D-4498-B5CB-7BB1542AD804@cis.upenn.edu> >> * It's tempting to make this a default behavior -- i.e., to *always* use a >> dummy fingerprint whenever we encounter a new file. > > Yes, that would also be more consistent. On reflection, I think it's a bad idea to enable this by default -- too much potential for bad behavior if someone happens to create files on the two replicas with the same length at the same time. > Another idea would be to automatically fingerprint files with a dummy hash in > the background in "spare"/idle time, i.e. while waiting for user input. IMHO > that would get us the best of both worlds: quick initial responses from a > user's POV, and proper fingerprints ASAP. That's a possibility, but I wonder whether people would really want it -- computing all those fingerprints for files that are never looked at can take a LONG time (many many hours) on a big replica. - Benjamin From bcpierce at seas.upenn.edu Sat Jul 24 08:19:38 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Sat, 24 Jul 2010 08:19:38 -0400 Subject: [Unison-hackers] [unison-svn] r459 - in trunk/src: . ubase Message-ID: <201007241219.o6OCJc0S018357@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-24 08:19:38 -0400 (Sat, 24 Jul 2010) New Revision: 459 Modified: trunk/src/RECENTNEWS trunk/src/copy.ml trunk/src/fileinfo.mli trunk/src/fingerprint.ml trunk/src/fingerprint.mli trunk/src/fpcache.ml trunk/src/fpcache.mli trunk/src/mkProjectInfo.ml trunk/src/os.ml trunk/src/os.mli trunk/src/stasher.ml trunk/src/test.ml trunk/src/ubase/trace.ml trunk/src/update.ml trunk/src/xferhint.mli Log: * Experimental implementation of a new "faster check" mode for update detection. When this mode is enabled (by running with 'fastercheckUNSAFE=true'), Unison will skip calculating fingerprints of the contents of files that it has not seen before -- it just uses the file's size as a pseudo-fingerprint, allowing the archives to be built very quickly. This feature has not been extensively tested -- if you use it on live replicas, please pay careful attention to what Unison is doing. Also, note that the cost of faster update detection is that it is possible Unison will miss a conflict; this flag should be used only when the replicas are known to be identical. Here's the full documentation. let fastercheckUNSAFE = Prefs.createBool "fastercheckUNSAFE" false "!skip computing fingerprints for new files (experts only!)" ( "THIS FEATURE IS STILL EXPERIMENTAL AND SHOULD BE USED WITH EXTREME CAUTION. " ^ "\n\n" ^ "When this flag is set to {\\tt true}, Unison will compute a 'pseudo-" ^ "fingerprint' the first time it sees a file (either because the file is " ^ "new or because Unison is running for the first time). This enormously " ^ "speeds update detection, but it must be used with care, as it can cause " ^ "Unison to miss conflicts: If " ^ "a given path in the filesystem contains files on {\\em both} sides that " ^ "Unison has not yet seen, and if those files have the same length but different " ^ "contents, then Unison will not notice the presence of a conflict. If, later, one " ^ "of the files is changed, the changed file will be propagated, overwriting " ^ "the other. " ^ "\n\n" ^ "Moreover, even when the files are initially identical, setting this flag can lead " ^ "to potentially confusing behavior: " ^ "if a newly created file is later touched without being modified, Unison will " ^ "treat this " ^ "conservatively as a potential change (since it has no record of the earlier " ^ "contents) and show it as needing to be propagated to the other replica. " ^ "\n\n" ^ "Most users should leave this flag off -- the small time savings of not " ^ "fingerprinting new files is not worth the cost in terms of safety. However, " ^ "it can be very useful for power users with huge replicas that are known to " ^ "be already synchronized (e.g., because one replica is a newly created duplicate " ^ "of the other, or because they have previously been synchronized with Unison but " ^ "Unison's archives need to be rebuilt). In such situations, it is recommended " ^ "that this flag be set only for the initial run of Unison, so that new archives " ^ "can be created quickly, and then turned off for normal use.") Modified: trunk/src/RECENTNEWS =================================================================== --- trunk/src/RECENTNEWS 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/RECENTNEWS 2010-07-24 12:19:38 UTC (rev 459) @@ -1,3 +1,53 @@ +CHANGES FROM VERSION 2.43.-30 + +* Experimental implementation of a new "faster check" mode for update + detection. When this mode is enabled (by running with + 'fastercheckUNSAFE=true'), Unison will skip calculating fingerprints + of the contents of files that it has not seen before -- it just uses + the file's size as a pseudo-fingerprint, allowing the archives to be + built very quickly. + + This feature has not been extensively tested -- if you use it on + live replicas, please pay careful attention to what Unison is doing. + Also, note that the cost of faster update detection is that it is + possible Unison will miss a conflict; this flag should be used only + when the replicas are known to be identical. + + Here's the full documentation. + +let fastercheckUNSAFE = + Prefs.createBool "fastercheckUNSAFE" + false "!skip computing fingerprints for new files (experts only!)" + ( "THIS FEATURE IS STILL EXPERIMENTAL AND SHOULD BE USED WITH EXTREME CAUTION. " + ^ "\n\n" + ^ "When this flag is set to {\\tt true}, Unison will compute a 'pseudo-" + ^ "fingerprint' the first time it sees a file (either because the file is " + ^ "new or because Unison is running for the first time). This enormously " + ^ "speeds update detection, but it must be used with care, as it can cause " + ^ "Unison to miss conflicts: If " + ^ "a given path in the filesystem contains files on {\\em both} sides that " + ^ "Unison has not yet seen, and if those files have the same length but different " + ^ "contents, then Unison will not notice the presence of a conflict. If, later, one " + ^ "of the files is changed, the changed file will be propagated, overwriting " + ^ "the other. " + ^ "\n\n" + ^ "Moreover, even when the files are initially identical, setting this flag can lead " + ^ "to potentially confusing behavior: " + ^ "if a newly created file is later touched without being modified, Unison will " + ^ "treat this " + ^ "conservatively as a potential change (since it has no record of the earlier " + ^ "contents) and show it as needing to be propagated to the other replica. " + ^ "\n\n" + ^ "Most users should leave this flag off -- the small time savings of not " + ^ "fingerprinting new files is not worth the cost in terms of safety. However, " + ^ "it can be very useful for power users with huge replicas that are known to " + ^ "be already synchronized (e.g., because one replica is a newly created duplicate " + ^ "of the other, or because they have previously been synchronized with Unison but " + ^ "Unison's archives need to be rebuilt). In such situations, it is recommended " + ^ "that this flag be set only for the initial run of Unison, so that new archives " + ^ "can be created quickly, and then turned off for normal use.") + +------------------------------- CHANGES FROM VERSION 2.43.-29 * Added support for "pseudo-fingerprints", as a first step to Modified: trunk/src/copy.ml =================================================================== --- trunk/src/copy.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/copy.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -67,7 +67,7 @@ None dataClearlyUnchanged in if dataClearlyUnchanged && ressClearlyUnchanged then begin - if paranoid then begin + if paranoid && not (Os.isPseudoFingerprint archDig) then begin let newDig = Os.fingerprint fspathFrom pathFrom info in if archDig <> newDig then begin Update.markPossiblyUpdated fspathFrom pathFrom; @@ -164,7 +164,7 @@ let paranoidCheck fspathTo pathTo realPathTo desc fp ress = let info = Fileinfo.get false fspathTo pathTo in let fp' = Os.fingerprint fspathTo pathTo info in - if fp' <> fp then begin + if fp' <> fp (* && not (Os.isPseudoFingerprint fp) *) then begin Lwt.return (Failure (Os.reasonForFingerprintMismatch fp fp')) end else Lwt.return (Success info) @@ -922,7 +922,7 @@ localFile fspathFrom pathFrom fspathTo pathTo realPathTo update desc (Osx.ressLength ress) (Some id); - paranoidCheck fspathTo pathTo realPathTo desc fp ress + paranoidCheck fspathTo pathTo realPathTo desc fp ress | _ -> transferFile rootFrom pathFrom rootTo fspathTo pathTo realPathTo Modified: trunk/src/fileinfo.mli =================================================================== --- trunk/src/fileinfo.mli 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/fileinfo.mli 2010-07-24 12:19:38 UTC (rev 459) @@ -6,7 +6,7 @@ type t = { typ : typ; inode : int; desc : Props.t; osX : Osx.info} -val get : bool -> Fspath.t -> Path.local -> t +val get : bool (* fromRoot *) -> Fspath.t -> Path.local -> t val set : Fspath.t -> Path.local -> [`Set of Props.t | `Copy of Path.local | `Update of Props.t] -> Props.t -> unit Modified: trunk/src/fingerprint.ml =================================================================== --- trunk/src/fingerprint.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/fingerprint.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -19,6 +19,13 @@ (* INCREMENT "UPDATE.ARCHIVEFORMAT" *) type t = string +let pseudo_prefix = "LEN" + +let pseudo path len = pseudo_prefix ^ (Uutil.Filesize.toString len) ^ "@" ^ + (Path.toString path) + +let ispseudo f = Util.startswith f pseudo_prefix + (* Assumes that (fspath, path) is a file and gives its ``digest '', that is *) (* a short string of cryptographic quality representing it. *) let file fspath path = @@ -66,14 +73,16 @@ (int2hexa first, int2hexa second) let toString md5 = - let length = String.length md5 in - let string = String.create (length * 2) in - for i=0 to (length - 1) do - let c1, c2 = hexaCode (md5.[i]) in - string.[2*i] <- c1; - string.[2*i + 1] <- c2; - done; - string + if ispseudo md5 then md5 else begin + let length = String.length md5 in + let string = String.create (length * 2) in + for i=0 to (length - 1) do + let c1, c2 = hexaCode (md5.[i]) in + string.[2*i] <- c1; + string.[2*i + 1] <- c2; + done; + string + end let string = Digest.string @@ -92,8 +101,3 @@ let equal (d : string) d' = d = d' -let pseudo_prefix = "LEN" - -let pseudo len = pseudo_prefix ^ (Uutil.Filesize.toString len) - -let ispseudo f = Util.startswith f pseudo_prefix Modified: trunk/src/fingerprint.mli =================================================================== --- trunk/src/fingerprint.mli 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/fingerprint.mli 2010-07-24 12:19:38 UTC (rev 459) @@ -21,5 +21,5 @@ (* A pseudo-fingerprint has the same type as a real one (so it can be stored in the archive, etc.), but it is computed just from the size of the file, ignoring the contents *) -val pseudo : Uutil.Filesize.t -> t +val pseudo : Path.local -> Uutil.Filesize.t -> t val ispseudo : t -> bool Modified: trunk/src/fpcache.ml =================================================================== --- trunk/src/fpcache.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/fpcache.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -128,9 +128,9 @@ let magic = "Unison fingerprint cache format 2" -let init fastCheck fspath = +let init fastCheck ignorearchives fspath = finish (); - if fastCheck then begin + if fastCheck && not ignorearchives then begin begin try debug (fun () -> Util.msg "opening cache file %s for input\n" (System.fspathToDebugString fspath)); @@ -235,7 +235,7 @@ in du && ressClearlyUnchanged fastCheck newInfo oldRess du -let fingerprint fastCheck currfspath path info optDig = +let fingerprint ?(newfile=false) fastCheck currfspath path info optDig = let res = try let (oldDesc, oldDig, oldStamp, oldRess) as res = @@ -251,7 +251,8 @@ if fastCheck then debug (fun () -> Util.msg "cache miss for path %s\n" (Path.toDebugString path)); - let (info, dig) = Os.safeFingerprint currfspath path info optDig in + let (info, dig) = + Os.safeFingerprint ~newfile currfspath path info optDig in (info.Fileinfo.desc, dig, Fileinfo.stamp info, Fileinfo.ressStamp info) in save path res; Modified: trunk/src/fpcache.mli =================================================================== --- trunk/src/fpcache.mli 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/fpcache.mli 2010-07-24 12:19:38 UTC (rev 459) @@ -2,13 +2,14 @@ (* Copyright 1999-2010, Benjamin C. Pierce (see COPYING for details) *) (* Initialize the cache *) -val init : bool -> System.fspath -> unit +val init : bool -> bool -> System.fspath -> unit (* Close the cache file and clear the in-memory cache *) val finish : unit -> unit (* Get the fingerprint of a file, possibly from the cache *) val fingerprint : + ?newfile:bool -> bool -> Fspath.t -> Path.local -> Fileinfo.t -> Os.fullfingerprint option -> Props.t * Os.fullfingerprint * Fileinfo.stamp * Osx.ressStamp Modified: trunk/src/mkProjectInfo.ml =================================================================== --- trunk/src/mkProjectInfo.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/mkProjectInfo.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -6,7 +6,7 @@ let projectName = "unison" let majorVersion = 2 let minorVersion = 43 -let pointVersionOrigin = 454 (* Revision that corresponds to point version 0 *) +let pointVersionOrigin = 455 (* Revision that corresponds to point version 0 *) (* Documentation: This is a program to construct a version of the form Major.Minor.Point, @@ -59,3 +59,4 @@ + Modified: trunk/src/os.ml =================================================================== --- trunk/src/os.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/os.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -224,39 +224,83 @@ (Fingerprint.file fspath path, Osx.ressFingerprint fspath path info.Fileinfo.osX) +let fastercheckUNSAFE = + Prefs.createBool "fastercheckUNSAFE" + false "!skip computing fingerprints for new files (experts only!)" + ( "THIS FEATURE IS STILL EXPERIMENTAL AND SHOULD BE USED WITH EXTREME CAUTION. " + ^ "\n\n" + ^ "When this flag is set to {\\tt true}, Unison will compute a 'pseudo-" + ^ "fingerprint' the first time it sees a file (either because the file is " + ^ "new or because Unison is running for the first time). This enormously " + ^ "speeds update detection, but it must be used with care, as it can cause " + ^ "Unison to miss conflicts: If " + ^ "a given path in the filesystem contains files on {\\em both} sides that " + ^ "Unison has not yet seen, and if those files have the same length but different " + ^ "contents, then Unison will not notice the presence of a conflict. If, later, one " + ^ "of the files is changed, the changed file will be propagated, overwriting " + ^ "the other. " + ^ "\n\n" + ^ "Moreover, even when the files are initially identical, setting this flag can lead " + ^ "to potentially confusing behavior: " + ^ "if a newly created file is later touched without being modified, Unison will " + ^ "treat this " + ^ "conservatively as a potential change (since it has no record of the earlier " + ^ "contents) and show it as needing to be propagated to the other replica. " + ^ "\n\n" + ^ "Most users should leave this flag off -- the small time savings of not " + ^ "fingerprinting new files is not worth the cost in terms of safety. However, " + ^ "it can be very useful for power users with huge replicas that are known to " + ^ "be already synchronized (e.g., because one replica is a newly created duplicate " + ^ "of the other, or because they have previously been synchronized with Unison but " + ^ "Unison's archives need to be rebuilt). In such situations, it is recommended " + ^ "that this flag be set only for the initial run of Unison, so that new archives " + ^ "can be created quickly, and then turned off for normal use.") + +let pseudoFingerprint path size = + (Fingerprint.pseudo path size, Fingerprint.dummy) + +let isPseudoFingerprint (fp,rfp) = + Fingerprint.ispseudo fp + (* FIX: not completely safe under Unix *) (* (with networked file system such as NFS) *) -let safeFingerprint fspath path info optDig = - let rec retryLoop count info optDig optRessDig = - if count = 0 then - raise (Util.Transient - (Printf.sprintf - "Failed to fingerprint file \"%s\": \ - the file keeps on changing" - (Fspath.toPrintString (Fspath.concat fspath path)))) - else - let dig = - match optDig with - None -> Fingerprint.file fspath path - | Some dig -> dig - in - let ressDig = - match optRessDig with - None -> Osx.ressFingerprint fspath path info.Fileinfo.osX - | Some ress -> ress - in - let (info', dataUnchanged, ressUnchanged) = - Fileinfo.unchanged fspath path info in - if dataUnchanged && ressUnchanged then - (info', (dig, ressDig)) +let safeFingerprint ?(newfile=false) fspath path info optDig = + if Prefs.read fastercheckUNSAFE && newfile then begin + debug (fun()-> Util.msg "skipping initial fingerprint of %s\n" + (Fspath.toDebugString (Fspath.concat fspath path))); + let info = Fileinfo.get false fspath path in + (info, pseudoFingerprint path (Props.length info.Fileinfo.desc)) + end else + let rec retryLoop count info optDig optRessDig = + if count = 0 then + raise (Util.Transient + (Printf.sprintf + "Failed to fingerprint file \"%s\": \ + the file keeps on changing" + (Fspath.toPrintString (Fspath.concat fspath path)))) else - retryLoop (count - 1) info' - (if dataUnchanged then Some dig else None) - (if ressUnchanged then Some ressDig else None) - in - retryLoop 10 info (* Maximum retries: 10 times *) - (match optDig with None -> None | Some (d, _) -> Some d) - None + let dig = + match optDig with + None -> Fingerprint.file fspath path + | Some dig -> dig + in + let ressDig = + match optRessDig with + None -> Osx.ressFingerprint fspath path info.Fileinfo.osX + | Some ress -> ress + in + let (info', dataUnchanged, ressUnchanged) = + Fileinfo.unchanged fspath path info in + if dataUnchanged && ressUnchanged then + (info', (dig, ressDig)) + else + retryLoop (count - 1) info' + (if dataUnchanged then Some dig else None) + (if ressUnchanged then Some ressDig else None) + in + retryLoop 10 info (* Maximum retries: 10 times *) + (match optDig with None -> None | Some (d, _) -> Some d) + None let fullfingerprint_to_string (fp,rfp) = Printf.sprintf "(%s,%s)" (Fingerprint.toString fp) (Fingerprint.toString rfp) @@ -274,13 +318,7 @@ let fullfingerprintEqual (fp, rfp) (fp', rfp') = Fingerprint.equal fp fp' && Fingerprint.equal rfp rfp' -let pseudoFingerprint size = - (Fingerprint.pseudo size, Fingerprint.dummy) -let isPseudoFingerprint (fp,rfp) = - Fingerprint.ispseudo fp - - (*****************************************************************************) (* UNISON DIRECTORY *) (*****************************************************************************) Modified: trunk/src/os.mli =================================================================== --- trunk/src/os.mli 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/os.mli 2010-07-24 12:19:38 UTC (rev 459) @@ -36,6 +36,7 @@ (* Use this function if the file may change during fingerprinting *) val safeFingerprint : + ?newfile:bool -> (* true if this file is new; false by default *) Fspath.t -> Path.local -> (* coordinates of file to fingerprint *) Fileinfo.t -> (* old fileinfo *) fullfingerprint option -> (* fingerprint corresponding to the old fileinfo *) @@ -46,10 +47,12 @@ Fileinfo.t -> (* old fileinfo *) fullfingerprint (* current fingerprint *) +(* BCP: Not sure this needs to be exported val pseudoFingerprint : Uutil.Filesize.t -> (* size of file to "fingerprint" *) fullfingerprint (* pseudo-fingerprint of this file (containing just the file's length) *) +*) val isPseudoFingerprint : fullfingerprint -> bool Modified: trunk/src/stasher.ml =================================================================== --- trunk/src/stasher.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/stasher.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -484,6 +484,9 @@ let rec aux_find i = let path = makeBackupName path i in if Os.exists dir path && + (* FIX: should check that the existing file has the same size, to + avoid computing the fingerprint if it is obviously going to be + different... *) (let dig = Os.fingerprint dir path (Fileinfo.get false dir path) in dig = fingerprint) then begin Modified: trunk/src/test.ml =================================================================== --- trunk/src/test.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/test.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -340,6 +340,31 @@ ); if bothRootsLocal then + runtest "fastercheckUNSAFE 1" ["fastercheckUNSAFE = true"] (fun() -> + put R1 (Dir []); put R2 (Dir []); sync(); + (* Create a file on both sides with different contents *) + put R1 (Dir ["x", File "foo"]); + put R2 (Dir ["x", File "bar"]); sync(); + check "1a" R1 (Dir ["x", File "foo"]); + check "1b" R2 (Dir ["x", File "bar"]); + (* Change contents on one side and see that we do NOT get a conflict (!) *) + put R1 (Dir ["x", File "newcontents"]); sync(); + check "2a" R1 (Dir ["x", File "newcontents"]); + check "2b" R2 (Dir ["x", File "newcontents"]); + (* Start again *) + put R1 (Dir []); put R2 (Dir []); sync(); + (* Create a file on both sides with different contents *) + put R1 (Dir ["x", File "foo"]); + put R2 (Dir ["x", File "bar"]); sync(); + (* Change contents without changing size and check that change is propagated *) + put R1 (Dir ["x", File "f00"]); sync(); + check "3a" R1 (Dir ["x", File "f00"]); + check "3b" R2 (Dir ["x", File "f00"]); + ); + + raise (Util.Fatal "Skipping some tests -- remove me!\n"); + + if bothRootsLocal then runtest "backups 1 (local)" ["backup = Name *"] (fun() -> put R1 (Dir []); put R2 (Dir []); sync(); (* Create a file and a directory *) Modified: trunk/src/ubase/trace.ml =================================================================== --- trunk/src/ubase/trace.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/ubase/trace.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -48,7 +48,7 @@ let debugtimes = Prefs.createBool "debugtimes" false "*annotate debugging messages with timestamps" "" - + let runningasserver = ref false let debugging() = (Prefs.read debugmods) <> [] Modified: trunk/src/update.ml =================================================================== --- trunk/src/update.ml 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/update.ml 2010-07-24 12:19:38 UTC (rev 459) @@ -23,6 +23,15 @@ let debugalias = Trace.debug "rootalias" let debugignore = Trace.debug "ignore" +let ignoreArchives = + Prefs.createBool "ignorearchives" false + "!ignore existing archive files" + ("When this preference is set, Unison will ignore any existing " + ^ "archive files and behave as though it were being run for the first " + ^ "time on these replicas. It is " + ^ "not a good idea to set this option in a profile: it is intended for " + ^ "command-line use.") + (*****************************************************************************) (* ARCHIVE DATATYPE *) (*****************************************************************************) @@ -652,7 +661,7 @@ let populateCacheFromArchive fspath arch = let (cacheFilename, _) = archiveName fspath FPCache in let cacheFile = Os.fileInUnisonDir cacheFilename in - Fpcache.init true cacheFile; + Fpcache.init true (Prefs.read ignoreArchives) cacheFile; populateCacheFromArchiveRec Path.empty arch; Fpcache.finish () @@ -660,15 +669,6 @@ (* Loading archives *) (*************************************************************************) -let ignoreArchives = - Prefs.createBool "ignorearchives" false - "!ignore existing archive files" - ("When this preference is set, Unison will ignore any existing " - ^ "archive files and behave as though it were being run for the first " - ^ "time on these replicas. It is " - ^ "not a good idea to set this option in a profile: it is intended for " - ^ "command-line use.") - let setArchiveData thisRoot fspath (arch, hash, magic, properties) info = let archMode = archiveMode magic in let curMode = (Case.ops ())#modeDesc in @@ -1547,12 +1547,12 @@ currfspath path info archive archDesc archDig archStamp archRess scanInfo | (`FILE, _) -> - debug (fun() -> Util.msg " buildUpdate -> Updated file\n"); + debug (fun() -> Util.msg " buildUpdate -> New file\n"); None, begin showStatusAddLength scanInfo info; let (desc, dig, stamp, ress) = - Fpcache.fingerprint + Fpcache.fingerprint ~newfile:true scanInfo.fastCheck currfspath path info None in Xferhint.insertEntry currfspath path dig; Updates (File (desc, ContentsUpdated (dig, stamp, ress)), @@ -1792,7 +1792,7 @@ in let (cacheFilename, _) = archiveName fspath FPCache in let cacheFile = Os.fileInUnisonDir cacheFilename in - Fpcache.init scanInfo.fastCheck cacheFile; + Fpcache.init scanInfo.fastCheck (Prefs.read ignoreArchives) cacheFile; let (archive, updates) = Safelist.fold_right (fun path (arch, upd) -> @@ -2203,10 +2203,11 @@ (Format.sprintf "The properties of file %s have been modified\n" (Path.toString path)) | Updates (File (desc, ContentsUpdated (_, _, ress)), - Previous (`FILE, oldDesc, _, oldRess)) -> - reportUpdate (fastCheckMiss path desc ress oldDesc oldRess) - (Format.sprintf "The contents of file %s has been modified\n" - (Path.toString path)) + Previous (`FILE, oldDesc, oldFp, oldRess)) -> + if not (Os.isPseudoFingerprint oldFp) then + reportUpdate (fastCheckMiss path desc ress oldDesc oldRess) + (Format.sprintf "The contents of file %s have been modified\n" + (Path.toString path)) | Updates (File (_, ContentsUpdated _), _) -> reportUpdate false (Format.sprintf "The file %s has been created\n" @@ -2247,8 +2248,7 @@ let scanInfo = { fastCheck = false; dirFastCheck = false; dirStamp = Props.changedDirStamp; - showStatus = false } - in + showStatus = false } in let (_, uiNew) = buildUpdateRec archive fspath localPath scanInfo in markPossiblyUpdatedRec fspath pathInArchive uiNew; explainUpdate pathInArchive uiNew; Modified: trunk/src/xferhint.mli =================================================================== --- trunk/src/xferhint.mli 2010-07-22 01:42:07 UTC (rev 458) +++ trunk/src/xferhint.mli 2010-07-24 12:19:38 UTC (rev 459) @@ -2,7 +2,7 @@ (* Copyright 1999-2010, Benjamin C. Pierce (see COPYING for details) *) (* This module maintains a cache that can be used to map - an Os.fingerprint to a (Fspath.t * Path.t) naming a file that *may* + an Os.fullfingerprint to a (Fspath.t * Path.t) naming a file that *may* (if we are lucky) have this fingerprint. The cache is not guaranteed to be reliable -- the things it returns are only hints, and must be double-checked before they are used (to optimize file transfers). *) From bcpierce at seas.upenn.edu Sat Jul 24 08:21:35 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Sat, 24 Jul 2010 08:21:35 -0400 Subject: [Unison-hackers] [unison-svn] r460 - trunk/src Message-ID: <201007241221.o6OCLZci018403@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-24 08:21:34 -0400 (Sat, 24 Jul 2010) New Revision: 460 Modified: trunk/src/RECENTNEWS trunk/src/mkProjectInfo.ml Log: * Bump revisionString -- not sure why this isn't happening automatically. Modified: trunk/src/RECENTNEWS =================================================================== --- trunk/src/RECENTNEWS 2010-07-24 12:19:38 UTC (rev 459) +++ trunk/src/RECENTNEWS 2010-07-24 12:21:34 UTC (rev 460) @@ -1,3 +1,8 @@ +CHANGES FROM VERSION 2.43.0 + +* Bump revisionString -- not sure why this isn't happening automatically. + +------------------------------- CHANGES FROM VERSION 2.43.-30 * Experimental implementation of a new "faster check" mode for update Modified: trunk/src/mkProjectInfo.ml =================================================================== --- trunk/src/mkProjectInfo.ml 2010-07-24 12:19:38 UTC (rev 459) +++ trunk/src/mkProjectInfo.ml 2010-07-24 12:21:34 UTC (rev 460) @@ -42,7 +42,7 @@ (* ---------------------------------------------------------------------- *) (* You shouldn't need to edit below. *) -let revisionString = "$Rev: 425$";; +let revisionString = "$Rev: 455$";; let pointVersion = Scanf.sscanf revisionString "$Rev: %d " (fun x -> x) - pointVersionOrigin;; @@ -60,3 +60,4 @@ + From bcpierce at seas.upenn.edu Sat Jul 24 08:40:06 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Sat, 24 Jul 2010 08:40:06 -0400 Subject: [Unison-hackers] [unison-svn] r461 - trunk/src Message-ID: <201007241240.o6OCe6hC018720@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-24 08:40:06 -0400 (Sat, 24 Jul 2010) New Revision: 461 Modified: trunk/src/RECENTNEWS trunk/src/mkProjectInfo.ml Log: * See if we can get revisionString to update automatically now... Modified: trunk/src/RECENTNEWS =================================================================== --- trunk/src/RECENTNEWS 2010-07-24 12:21:34 UTC (rev 460) +++ trunk/src/RECENTNEWS 2010-07-24 12:40:06 UTC (rev 461) @@ -1,5 +1,11 @@ CHANGES FROM VERSION 2.43.0 +* See if we can get revisionString to update automatically now... + + +------------------------------- +CHANGES FROM VERSION 2.43.0 + * Bump revisionString -- not sure why this isn't happening automatically. ------------------------------- Modified: trunk/src/mkProjectInfo.ml =================================================================== --- trunk/src/mkProjectInfo.ml 2010-07-24 12:21:34 UTC (rev 460) +++ trunk/src/mkProjectInfo.ml 2010-07-24 12:40:06 UTC (rev 461) @@ -42,7 +42,7 @@ (* ---------------------------------------------------------------------- *) (* You shouldn't need to edit below. *) -let revisionString = "$Rev: 455$";; +let revisionString = "$Rev$";; let pointVersion = Scanf.sscanf revisionString "$Rev: %d " (fun x -> x) - pointVersionOrigin;; @@ -51,13 +51,3 @@ Printf.printf "VERSION=%d.%d.%d\n" majorVersion minorVersion pointVersion;; Printf.printf "NAME=%s\n" projectName;; - - - - - - - - - - From Jerome.Vouillon at pps.jussieu.fr Sat Jul 24 09:30:37 2010 From: Jerome.Vouillon at pps.jussieu.fr (Jerome Vouillon) Date: Sat, 24 Jul 2010 15:30:37 +0200 Subject: [Unison-hackers] Experimental "faster check" mode In-Reply-To: <201007241219.o6OCJc0S018357@yaws.seas.upenn.edu> References: <201007241219.o6OCJc0S018357@yaws.seas.upenn.edu> Message-ID: <20100724133036.GA11298@pps.jussieu.fr> This is great! A few remarks: - *Don't* store pseudo-fingerprints in the fingerprint cache. The cache contents should reflect what is known about file contents, and thus not depend on Unison settings. Pseudo-fingerprints should be handled at a higher level. In particular, the fingerprint cache provides a way to rebuild an archive rapidly, without having to rescan all files. Your code is breaking that. - You should check that the Xferhint code still works properly. I suspect we don't want to store pseudo-fingerprints there either... - If I read the code correctly, when "fastercheckUNSAFE" is set to true, any new file will fail to transfer. This is the safe thing to do (though the failure happen at a very late stage, once the file contents has been transferred) but is very restrictive. I explained in my previous mail how we can perform a transfer safely in this case too, but a protocol change is needed. (That would be useful when one replica is initially empty: Unison would be able to start transferring files right away.) -- Jerome From bcpierce at cis.upenn.edu Sat Jul 24 09:44:25 2010 From: bcpierce at cis.upenn.edu (Benjamin C. Pierce) Date: Sat, 24 Jul 2010 09:44:25 -0400 Subject: [Unison-hackers] Experimental "faster check" mode In-Reply-To: <20100724133036.GA11298@pps.jussieu.fr> References: <201007241219.o6OCJc0S018357@yaws.seas.upenn.edu> <20100724133036.GA11298@pps.jussieu.fr> Message-ID: Thanks for the comments, J?rome! > - *Don't* store pseudo-fingerprints in the fingerprint cache. The > cache contents should reflect what is known about file contents, and > thus not depend on Unison settings. Pseudo-fingerprints should be > handled at a higher level. In particular, the fingerprint cache > provides a way to rebuild an archive rapidly, without having to > rescan all files. Your code is breaking that. The test if fastCheck && not ignorearchives then begin at the beginning of Fpcache.init was intended to prevent pseudo-fingerprints from getting into the cache (by turning off caching). Would it be better to change Fpcache.save to drop individual fingeprints when they are pseudos? > - You should check that the Xferhint code still works properly. > I suspect we don't want to store pseudo-fingerprints there either... Pseudo-fingerprints include the fingerprinted path, so we should never have a hit in the Xferhint table on a pseudo-fp. Still, it is a waste of space. I'll change Xferhint.insertEntry like this: let insertEntry fspath path fp = if Prefs.read xferbycopying && not (Os.isPseudoFingerprint fp) then begin > - If I read the code correctly, when "fastercheckUNSAFE" is set to > true, any new file will fail to transfer. This is the safe thing to > do (though the failure happen at a very late stage, once the file > contents has been transferred) but is very restrictive. I explained > in my previous mail how we can perform a transfer safely in this > case too, but a protocol change is needed. (That would be useful > when one replica is initially empty: Unison would be able to start > transferring files right away.) Sorry -- I had gotten partway though this case and forgot to finish it. I didn't completely understand your recommendation, since paranoidCheck is called in two places (and neither of them is in checkContentsChangeLocal), but I'll have another look now... - Benjamin From bcpierce at seas.upenn.edu Sat Jul 24 10:07:55 2010 From: bcpierce at seas.upenn.edu (bcpierce@seas.upenn.edu) Date: Sat, 24 Jul 2010 10:07:55 -0400 Subject: [Unison-hackers] [unison-svn] r462 - trunk/src Message-ID: <201007241407.o6OE7tXf020177@yaws.seas.upenn.edu> Author: bcpierce Date: 2010-07-24 10:07:55 -0400 (Sat, 24 Jul 2010) New Revision: 462 Modified: trunk/src/RECENTNEWS trunk/src/mkProjectInfo.ml trunk/src/test.ml trunk/src/xferhint.ml Log: * A small fix suggested by Jerome. Still thinking about what needs to chance to get newly created files to transfer without failing. Modified: trunk/src/RECENTNEWS =================================================================== --- trunk/src/RECENTNEWS 2010-07-24 12:40:06 UTC (rev 461) +++ trunk/src/RECENTNEWS 2010-07-24 14:07:55 UTC (rev 462) @@ -1,3 +1,9 @@ +CHANGES FROM VERSION 2.43.6 + +* A small fix suggested by Jerome. Still thinking about what needs to + chance to get newly created files to transfer without failing. + +------------------------------- CHANGES FROM VERSION 2.43.0 * See if we can get revisionString to update automatically now... Modified: trunk/src/mkProjectInfo.ml =================================================================== --- trunk/src/mkProjectInfo.ml 2010-07-24 12:40:06 UTC (rev 461) +++ trunk/src/mkProjectInfo.ml 2010-07-24 14:07:55 UTC (rev 462) @@ -51,3 +51,4 @@ Printf.printf "VERSION=%d.%d.%d\n" majorVersion minorVersion pointVersion;; Printf.printf "NAME=%s\n" projectName;; + Modified: trunk/src/test.ml =================================================================== --- trunk/src/test.ml 2010-07-24 12:40:06 UTC (rev 461) +++ trunk/src/test.ml 2010-07-24 14:07:55 UTC (rev 462) @@ -351,6 +351,7 @@ put R1 (Dir ["x", File "newcontents"]); sync(); check "2a" R1 (Dir ["x", File "newcontents"]); check "2b" R2 (Dir ["x", File "newcontents"]); + (* Start again *) put R1 (Dir []); put R2 (Dir []); sync(); (* Create a file on both sides with different contents *) @@ -360,6 +361,13 @@ put R1 (Dir ["x", File "f00"]); sync(); check "3a" R1 (Dir ["x", File "f00"]); check "3b" R2 (Dir ["x", File "f00"]); + + (* Start again *) + put R1 (Dir []); put R2 (Dir []); sync(); + (* Create a new file on one side only *) + put R1 (Dir ["x", File "foo"]); sync(); + (* Check that change is propagated *) + check "4" R2 (Dir ["x", File "foo"]); ); raise (Util.Fatal "Skipping some tests -- remove me!\n"); Modified: trunk/src/xferhint.ml =================================================================== --- trunk/src/xferhint.ml 2010-07-24 12:40:06 UTC (rev 461) +++ trunk/src/xferhint.ml 2010-07-24 14:07:55 UTC (rev 462) @@ -56,7 +56,7 @@ None let insertEntry fspath path fp = - if Prefs.read xferbycopying then begin + if Prefs.read xferbycopying && not (Os.isPseudoFingerprint fp) then begin debug (fun () -> Util.msg "insertEntry: fspath=%s, path=%s, fp=%s\n" (Fspath.toDebugString fspath) From Jerome.Vouillon at pps.jussieu.fr Sat Jul 24 10:24:56 2010 From: Jerome.Vouillon at pps.jussieu.fr (Jerome Vouillon) Date: Sat, 24 Jul 2010 16:24:56 +0200 Subject: [Unison-hackers] Experimental "faster check" mode In-Reply-To: References: <201007241219.o6OCJc0S018357@yaws.seas.upenn.edu> <20100724133036.GA11298@pps.jussieu.fr> Message-ID: <20100724142456.GA11418@pps.jussieu.fr> On Sat, Jul 24, 2010 at 09:44:25AM -0400, Benjamin C. Pierce wrote: > The test > > if fastCheck && not ignorearchives then begin > > at the beginning of Fpcache.init was intended to prevent > pseudo-fingerprints from getting into the cache (by turning off > caching). But "ignorearchives" is set to true when the "ignorearchive" preference is set, not when "fastercheckUNSAFE" preference is set... > Would it be better to change Fpcache.save to drop > individual fingeprints when they are pseudos? The check "Prefs.read fastercheckUNSAFE && newfile" should be at the beginning of Fpcache.fingerprint rather than in Os.fingerprint. > > - You should check that the Xferhint code still works properly. > > I suspect we don't want to store pseudo-fingerprints there either... > > Pseudo-fingerprints include the fingerprinted path, so we should > never have a hit in the Xferhint table on a pseudo-fp. We should include a fingerprint of the path, not the whole path. Otherwise, we will end up with very large archive files. Still, it is a waste of space. I'll change Xferhint.insertEntry like this: > > let insertEntry fspath path fp = > if Prefs.read xferbycopying && not (Os.isPseudoFingerprint fp) then begin > > > - If I read the code correctly, when "fastercheckUNSAFE" is set to > > true, any new file will fail to transfer. This is the safe thing to > > do (though the failure happen at a very late stage, once the file > > contents has been transferred) but is very restrictive. I explained > > in my previous mail how we can perform a transfer safely in this > > case too, but a protocol change is needed. (That would be useful > > when one replica is initially empty: Unison would be able to start > > transferring files right away.) > > Sorry -- I had gotten partway though this case and forgot to finish > it. I didn't completely understand your recommendation, since > paranoidCheck is called in two places (and neither of them is in > checkContentsChangeLocal), but I'll have another look now... At the moment, paranoidCheck returns a status: either Success or Failure (with some explanation of the failure). In Copy.file, checkContentsChange is then called with a different boolean argument depending on the status. The function paranoidCheck could return a fingerprint instead of the explanation when it cannot decide whether the file was correctly transferred. Somethink like: let paranoidCheck fspathTo pathTo realPathTo desc fp ress = let info = Fileinfo.get false fspathTo pathTo in let fp' = Os.fingerprint fspathTo pathTo info in if fp' <> fp then begin Lwt.return (info, Undecided fp') end else Lwt.return (info, Success) This fingerprint would be passed to checkContentsChangeLocal which compute the fingerprint of the source file and can thus make sure the two fingerprint matches. Function checkContentsChangeLocal will have three fingerprints: the one from update detection which may be a pseudo-fingerprint (FPU), and the fingerprints of the source (FPS) and destination (FPD) files. If FPU is not a pseudo-fingerprint, the behavior of the function should remain unchanged. If it is a pseudo-fingerprint and FPS <> FPD, we should report that the file is not correctly transferred. -- Jerome From bcpierce at cis.upenn.edu Wed Jul 28 09:21:30 2010 From: bcpierce at cis.upenn.edu (Benjamin C. Pierce) Date: Wed, 28 Jul 2010 09:21:30 -0400 Subject: [Unison-hackers] Command line arguments and OS X GUI In-Reply-To: <84B1348A-330A-470E-B90C-E3CEB8DAA121@polytechnique.org> References: <4BE3FB04.3030100@strank.info> <4BE40B86.7030609@strank.info> <84B1348A-330A-470E-B90C-E3CEB8DAA121@polytechnique.org> Message-ID: <2BE7D519-46D8-4D9E-9BFA-E3F5B1DE758E@cis.upenn.edu> > Should we try then to fix the GUI, or to change the tutorial? Seems like fixing the GUI is the right thing to do, and probably pretty easy (for someone that knows their way around that code a little)... - B On Jun 28, 2010, at 9:24 AM, Alan Schmitt wrote: > On 7 mai 2010, at 14:45, Stefan Rank wrote: > >> on Friday 2010-05-07 14:00 Alan Schmitt said the following: >>> On Fri, May 7, 2010 at 1:35 PM, Stefan Rank wrote: >> >>>> The unison binary that can be installed via the menu option of the >>>> macnew GUI (to /usr/bin/unison) calls the internal Unison (capital U) >>>> and accepts command-line arguments here. >>> >>> Yes. And this is where things fail. When I call: >>> /usr/bin/unison a.tmp b.tmp >>> as specified in the tutorial, I get the profile chooser. (The other >>> unison, in my ~/bin directory, is a text only version and works fine.) >>> >>> Note that this has been reported several times by OS X users: some >>> (most?) command line options work, but specifying the roots like above >>> does not. >> >> Yes, you're right. >> (Sorry for not trying exactly what you suggested right at the beginning...) >> >> I just never realised since I am always using profiles, only changing >> options, but not directly specifying roots. >> >> It also accepts the version:: >> >> unison -root a -root b >> >> which has the same problem, but it gives an error if you try:: >> >> unison -root >> /Applications/Unison.app/Contents/MacOS/Unison: option `-root' needs >> an argument. >> >> so the option parsing seems to be ok. >> I would guess the feature is simply missing from the Mac GUI? > > Should we try then to fix the GUI, or to change the tutorial? > > Alan > _______________________________________________ > Unison-hackers mailing list > Unison-hackers at lists.seas.upenn.edu > http://lists.seas.upenn.edu/mailman/listinfo/unison-hackers