% Copyright (C) 2003-2005 David Roundy % % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2, or (at your option) % any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; see the file COPYING. If not, write to % the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, % Boston, MA 02110-1301, USA. \subsection{darcs optimize} \begin{code} module Optimize ( optimize ) where import DarcsUtils ( withCurrentDirectory ) import Monad ( when, unless, liftM ) import Control.Exception ( block ) import Maybe ( isJust ) import Text.Regex ( mkRegex, matchRegex ) import DarcsCommands ( DarcsCommand(..), nodefaults ) import DarcsArguments ( DarcsFlag( Compress, UnCompress, Verbose, ModernizePatches, NoCompress, Reorder, TagName, CheckPoint, Relink, RelinkPristine ), tagname, verbose, checkpoint, reorder_patches, uncompress_nocompress, relink, relink_pristine, sibling, flagsToSiblings, modernize_patches, working_repo_dir, umask_option, ) import RepoPrefs ( defaultrepo ) import Repository ( PatchSet, withRepoLock ) import DarcsRepo ( read_repo, write_inventory, write_checkpoint, am_in_repo, write_patch, ) import PatchInfo ( PatchInfo, just_name, make_filename, human_friendly ) import Patch ( readPatch, gzWritePatch, writePatch, modernize_patch ) import FastPackedString ( gzReadFilePS ) import Depends ( deep_optimize_patchset, slightly_optimize_patchset, get_patches_beyond_tag, get_patches_in_tag, ) import Lock ( maybeRelink, gzWriteAtomicFilePS, writeAtomicFilePS, ) import DarcsUtils ( catchall ) import Printer ( putDocLn, text, ($$) ) import SlurpDirectory ( slurp, list_slurpy_files ) import Pristine ( identifyPristine, slurpPristine, pristineDirectory ) #include "impossible.h" \end{code} \begin{code} optimize_description :: String optimize_description = "Optimize the repository." \end{code} \options{optimize} \haskell{optimize_help} \begin{code} optimize_help :: String optimize_help = "Optimize can help to improve the performance of your repository in a number of cases.\n" \end{code} \begin{code} optimize :: DarcsCommand optimize = DarcsCommand {command_name = "optimize", command_help = optimize_help, command_description = optimize_description, command_extra_args = 0, command_extra_arg_help = [], command_command = optimize_cmd, command_prereq = am_in_repo, command_get_arg_possibilities = return [], command_argdefaults = nodefaults, command_darcsoptions = [checkpoint, uncompress_nocompress, tagname, verbose, working_repo_dir, modernize_patches, reorder_patches, sibling, relink, relink_pristine, umask_option]} \end{code} \begin{code} optimize_cmd :: [DarcsFlag] -> [String] -> IO () optimize_cmd opts _ = withRepoLock opts $ \_ -> do do_reorder opts do_optimize_inventory opts when (CheckPoint `elem` opts) $ do_checkpoint opts when (Compress `elem` opts || UnCompress `elem` opts || ModernizePatches `elem` opts) $ optimize_compression opts when (Relink `elem` opts || (RelinkPristine `elem` opts)) $ do_relink opts putStrLn "Done optimizing!" is_tag :: PatchInfo -> Bool is_tag pinfo = take 4 (just_name pinfo) == "TAG " \end{code} Optimize always writes out a fresh copy of the inventory that minimizes the amount of inventory that need be downloaded when people pull from the repository. Specifically, it breaks up the inventory on the most recent tag. This speeds up most commands when run remotely, both because a smaller file needs to be transfered (only the most recent inventory). It also gives a guarantee that all the patches prior to a given tag are included in that tag, so less commutation and history traversal is needed. This latter issue can become very important in large repositories. \begin{code} do_optimize_inventory :: [DarcsFlag] -> IO () do_optimize_inventory opts = do ps <- read_repo "." when (Verbose `elem` opts) $ putStrLn "Writing out a nice copy of the inventory." write_inventory "." ps \end{code} \begin{options} --checkpoint, --tag \end{options} If you use the \verb!--checkpoint! option, optimize creates a checkpoint patch for a tag. You can specify the tag with the \verb!--tag! option, or just let darcs choose the most recent tag. Note that optimize \verb!--checkpoint! will fail when used on a ``partial'' repository. Also, the tag that is to be checkpointed must not be preceded by any patches that are not included in that tag. If that is the case, no checkpointing is done. The created checkpoint is used by the \verb!--partial! flag to \verb!get! and \verb!check!. This allows for users to retrieve a working repository with limited history with a savings of disk space and bandwidth. \begin{code} do_checkpoint :: [DarcsFlag] -> IO () do_checkpoint opts = do mpi <- get_tag opts case mpi of Nothing -> return () Just pinfo -> do putDocLn $ text "Checkpointing tag:" $$ human_friendly pinfo write_checkpoint pinfo get_tag :: [DarcsFlag] -> IO (Maybe PatchInfo) get_tag [] = do ps <- read_repo "." case filter (is_tag . fst) $ lasts ps of [] -> do putStrLn "There is no tag to checkpoint!" return Nothing ((pinfo,_):_) -> return $ Just pinfo get_tag (TagName t:_) = do ps <- read_repo "." case filter (match_tag t) $ map fst $ lasts ps of (pinfo:_) -> return $ Just pinfo _ -> case filter (match_tag t) $ map fst $ lasts $ deep_optimize_patchset ps of (pinfo:_) -> return $ Just pinfo _ -> do putStr "Cannot checkpoint any tag " putStr $ "matching '"++t++"'\n" return Nothing get_tag (_:fs) = get_tag fs lasts :: [[a]] -> [a] lasts [] = [] lasts (x@(_:_):ls) = last x : lasts ls lasts ([]:ls) = lasts ls \end{code} \begin{code} mymatch :: String -> PatchInfo -> Bool mymatch r = match_name $ matchRegex (mkRegex r) match_name :: (String -> Maybe a) -> PatchInfo -> Bool match_name ch pinfo = isJust $ ch (just_name pinfo) match_tag :: String -> PatchInfo -> Bool match_tag ('^':n) = mymatch $ "^TAG "++n match_tag n = mymatch $ "^TAG .*"++n \end{code} \begin{options} --compress, --dont-compress, --uncompress \end{options} Some compression options are available, and are independent of the \verb!--checkpoint! option. By default the patches in the repository are compressed. These use less disk space, which translates into less bandwidth if the repository is accessed remotely. Note that patches will always have the ``.gz'' extension whether they are compressed or not. You may want to uncompress the patches when you've got enough disk space but are running out of physical memory. If you give the \verb!--compress! option, optimize will compress all the patches in the repository. Similarly, if you give the \verb!--uncompress!, optimize will decompress all the patches in the repository. \verb!--dont-compress! means ``don't compress, but don't uncompress either''. It would be useful if one of the compression options was provided as a default and you wanted to override it. \begin{options} --modernize-patches \end{options} If you provide the \verb!--modernize-patches! argument, darcs will convert obsolete patches into the current darcs format. This affects both the patch contents and the patch formatting. Older versions of darcs formatted the long comments slightly differently, which can cause trouble with third-party tools that wish to parse the darcs patches, although darcs itself still reads the older patches fine. \verb!--modernize-patches! standardizes the formatting of all patches. In addition, \emph{very} old versions of darcs created the ``merger 0.9'' patch type when there were conflicts. This patch type inherently had bugs which could lead to corruption, which is why it was phased out. \verb!--modernize-patches! will convert old ``merger 0.9'' patches into an equivalent change (which will, however, commute differently). \begin{code} optimize_compression :: [DarcsFlag] -> IO () optimize_compression opts = do r <- read_repo "." withCurrentDirectory "_darcs/patches" (sequence_ $ map (do_compress.make_filename.fst) $ concat r) where wps = if Compress `elem` opts then gzWriteAtomicFilePS else writeAtomicFilePS writeP = if NoCompress `elem` opts || UnCompress `elem` opts then writePatch else gzWritePatch do_compress f = if ModernizePatches `elem` opts then (do contents <- gzReadFilePS f case readPatch contents of Nothing -> return () Just (p,_) -> writeP f $ modernize_patch p) `catchall` return () else (gzReadFilePS f >>= wps f) `catchall` return () \end{code} \begin{options} --relink \end{options} The \verb|--relink| and \verb|--relink-pristine| options cause Darcs to relink files from a sibling. See Section \ref{disk-usage}. \begin{code} do_relink :: [DarcsFlag] -> IO () do_relink opts = do some_siblings <- return (flagsToSiblings opts) defrepo <- defaultrepo "" [] siblings <- return (some_siblings ++ defrepo) if (siblings == []) then putStrLn "No siblings -- no relinking done." else do when (Relink `elem` opts) $ do putVerbose "Relinking patches..." patches <- (liftM list_slurpy_files) (slurp "_darcs/patches") maybeRelinkFiles siblings patches "_darcs/patches" when (RelinkPristine `elem` opts) $ do pristine <- identifyPristine case (pristineDirectory pristine) of (Just d) -> do putVerbose "Relinking pristine tree..." c <- (liftM fromJust) (slurpPristine pristine) maybeRelinkFiles siblings (list_slurpy_files c) d Nothing -> return () putVerbose "Done relinking." return () return () where putVerbose s = when (Verbose `elem` opts) (putStrLn s) maybeRelinkFiles :: [String] -> [String] -> String -> IO () maybeRelinkFiles src dst dir = mapM_ (maybeRelinkFile src) (map ((dir ++ "/") ++) dst) maybeRelinkFile :: [String] -> String -> IO () maybeRelinkFile [] _ = return () maybeRelinkFile (h:t) f = do done <- maybeRelink (h ++ "/" ++ f) f unless done $ maybeRelinkFile t f return () \end{code} \begin{options} --reorder-patches \end{options} The \verb|--reorder-patches| option causes Darcs to create an optimal ordering of its internal patch inventory. This may help to produce shorter `context' lists when sending patches, and may improve performance for some other operations as well. You should not run \verb!--reorder-patches! on a repository from which someone may be simultaneously pulling or getting, as this could lead to repository corruption. \begin{code} do_reorder :: [DarcsFlag] -> IO () do_reorder opts | not (Reorder `elem` opts) = return () do_reorder opts = do when (Verbose `elem` opts) $ putStrLn "Reordering the inventory." psnew <- choose_order `liftM` read_repo "." block $ do write_patchset opts psnew write_inventory "." psnew choose_order :: PatchSet -> PatchSet choose_order ps | isJust last_tag = case slightly_optimize_patchset $ get_patches_in_tag lt ps of ([t]:pps) -> case get_patches_beyond_tag lt ps of [p] -> (p++[t]) : pps _ -> impossible _ -> impossible where last_tag = case filter is_tag $ map fst $ concat ps of (t:_) -> Just t _ -> Nothing lt = fromJust last_tag choose_order ps = ps write_patchset :: [DarcsFlag] -> PatchSet -> IO () write_patchset opts ps = let wp Nothing = return () wp (Just p) = do write_patch opts p return () in mapM_ (wp . snd) $ concat ps \end{code}