From a15318ce020c41ef60f36ec408c5518c943eabb7 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Mon, 1 Feb 2016 10:10:35 +0000 Subject: [PATCH 1/3] Add get{Source,Installed}PackagesMonitorFiles Re-export getInstalledPackagesMonitorFiles from Cabal lib and add getSourcePackagesMonitorFiles locally to D.C.IndexUtils. These are for tracking changes to these bits of the environment, so that it's possible for us to recompute things that depend on them. --- cabal-install/Distribution/Client/IndexUtils.hs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cabal-install/Distribution/Client/IndexUtils.hs b/cabal-install/Distribution/Client/IndexUtils.hs index 042ee60976d..95545fc8b6f 100644 --- a/cabal-install/Distribution/Client/IndexUtils.hs +++ b/cabal-install/Distribution/Client/IndexUtils.hs @@ -17,7 +17,9 @@ module Distribution.Client.IndexUtils ( getIndexFileAge, getInstalledPackages, + Configure.getInstalledPackagesMonitorFiles, getSourcePackages, + getSourcePackagesMonitorFiles, Index(..), PackageEntry(..), @@ -52,7 +54,7 @@ import Distribution.Simple.Compiler import Distribution.Simple.Program ( ProgramConfiguration ) import qualified Distribution.Simple.Configure as Configure - ( getInstalledPackages ) + ( getInstalledPackages, getInstalledPackagesMonitorFiles ) import Distribution.ParseUtils ( ParseResult(..) ) import Distribution.Version @@ -204,6 +206,13 @@ readRepoIndex verbosity repoCtxt repo = getIndexFileAge :: Repo -> IO Double getIndexFileAge repo = getFileAge $ repoLocalDir repo "00-index.tar" +-- | A set of files (or directories) that can be monitored to detect when +-- there might have been a change in the source packages. +-- +getSourcePackagesMonitorFiles :: [Repo] -> [FilePath] +getSourcePackagesMonitorFiles repos = + [ repoLocalDir repo "00-index.cache" + | repo <- repos ] -- | It is not necessary to call this, as the cache will be updated when the -- index is read normally. However you can do the work earlier if you like. From f35e05df7f8c9ddade57c5ab752d7410be527fa8 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Mon, 1 Feb 2016 18:48:34 +0000 Subject: [PATCH 2/3] New infrastructure for tracking changes in files and values A FileMonitor is an abstraction for monitoring the status of files, as well as changes in an in-memory value. Files can be tracked by file modification time, or mod time plus content. We can also track files that are expected not to exist (to help implement search paths). We can also have file globs. The abstraction is useful for re-running actions when input files or values change. This pattern is captured by the Rebuild monad. This adds a dependency on the hashable package (used by unordered-containers). If this is a problem we can extract just the hash function we need. This is not used yet, so there's a temporary import just to make sure it gets compiled. --- .../Distribution/Client/FileMonitor.hs | 888 ++++++++++++++++++ cabal-install/Distribution/Client/Glob.hs | 106 +++ .../Distribution/Client/RebuildMonad.hs | 112 +++ cabal-install/Main.hs | 4 + cabal-install/cabal-install.cabal | 4 + 5 files changed, 1114 insertions(+) create mode 100644 cabal-install/Distribution/Client/FileMonitor.hs create mode 100644 cabal-install/Distribution/Client/Glob.hs create mode 100644 cabal-install/Distribution/Client/RebuildMonad.hs diff --git a/cabal-install/Distribution/Client/FileMonitor.hs b/cabal-install/Distribution/Client/FileMonitor.hs new file mode 100644 index 00000000000..5389503e2cb --- /dev/null +++ b/cabal-install/Distribution/Client/FileMonitor.hs @@ -0,0 +1,888 @@ +{-# LANGUAGE CPP, DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving, + NamedFieldPuns, BangPatterns #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | An abstraction to help with re-running actions when files or other +-- input values they depend on have changed. +-- +module Distribution.Client.FileMonitor ( + + -- * Declaring files to monitor + MonitorFilePath(..), + FilePathGlob(..), + monitorFileSearchPath, + monitorFileHashedSearchPath, + + -- * Creating and checking sets of monitored files + FileMonitor(..), + newFileMonitor, + MonitorChanged(..), + MonitorChangedReason(..), + checkFileMonitorChanged, + updateFileMonitor, + + matchFileGlob, + ) where + + +#if MIN_VERSION_containers(0,5,0) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +#else +import Data.Map (Map) +import qualified Data.Map as Map +#endif +import qualified Data.ByteString.Lazy as BS +import Distribution.Compat.Binary +import qualified Distribution.Compat.Binary as Binary +#if !MIN_VERSION_base(4,8,0) +import Data.Traversable (traverse) +#endif +import qualified Data.Hashable as Hashable +import Data.List (sort) +#if MIN_VERSION_directory(1,2,0) +import Data.Time (UTCTime(..), Day(..)) +#else +import System.Time (ClockTime(..)) +#endif + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad +import Control.Monad.Trans (MonadIO, liftIO) +import Control.Monad.State (StateT) +import qualified Control.Monad.State as State +import Control.Monad.Except (ExceptT, runExceptT, throwError) +import Control.Exception + +import Distribution.Text +import Distribution.Compat.ReadP ((<++)) +import qualified Distribution.Compat.ReadP as ReadP +import qualified Text.PrettyPrint as Disp + +import Distribution.Client.Glob +import Distribution.Simple.Utils (writeFileAtomic) +import Distribution.Client.Utils (mergeBy, MergeResult(..)) + +import System.FilePath +import System.Directory +import System.IO +import System.IO.Error +import GHC.Generics (Generic) + + +------------------------------------------------------------------------------ +-- Types for specifying files to monitor +-- + + +-- | A description of a file (or set of files) to monitor for changes. +-- +-- All file paths here are relative to a common directory (e.g. project root). +-- +data MonitorFilePath = + + -- | Monitor a single file for changes, based on its modification time. + -- The monitored file is considered to have changed if it no longer + -- exists or if its modification time has changed. + -- + MonitorFile !FilePath + + -- | Monitor a single file for changes, based on its modification time + -- and content hash. The monitored file is considered to have changed if + -- it no longer exists or if its modification time and content hash have + -- changed. + -- + | MonitorFileHashed !FilePath + + -- | Monitor a single non-existent file for changes. The monitored file + -- is considered to have changed if it exists. + -- + | MonitorNonExistentFile !FilePath + + -- | Monitor a set of files identified by a file glob. The monitored glob + -- is considered to have changed if the set of files matching the glob + -- changes (i.e. creations or deletions), or if the modification time and + -- content hash of any matching file has changed. + -- + | MonitorFileGlob !FilePathGlob + -- Note: currently file globs always use mtime+hash, so they're the + -- equivalent of MonitorFileHashed above. If we need globed files with + -- only mtime then it's perfectly ok to add it. + + deriving (Show, Generic) + +instance Binary MonitorFilePath + +-- | A file path specified by globbing +-- +data FilePathGlob + = GlobDir !Glob !FilePathGlob + | GlobFile !Glob + deriving (Eq, Show, Generic) + +instance Binary FilePathGlob + +-- | Creates a list of files to monitor when you search for a file which +-- unsuccessfully looked in @notFoundAtPaths@ before finding it at +-- @foundAtPath@. +monitorFileSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] +monitorFileSearchPath notFoundAtPaths foundAtPath = + MonitorFile foundAtPath + : map MonitorNonExistentFile notFoundAtPaths + +-- | Similar to 'monitorFileSearchPath', but also instructs us to +-- monitor the hash of the found file. +monitorFileHashedSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] +monitorFileHashedSearchPath notFoundAtPaths foundAtPath = + MonitorFileHashed foundAtPath + : map MonitorNonExistentFile notFoundAtPaths + + +------------------------------------------------------------------------------ +-- Implementation types, files status +-- + +-- | The state necessary to determine whether a set of monitored +-- files has changed. It consists of two parts: a set of specific +-- files to be monitored (index by their path), and a list of +-- globs, which monitor may files at once. +data MonitorStateFileSet + = MonitorStateFileSet !(Map FilePath MonitorStateFile) + ![MonitorStateGlob] + deriving Show + +type Hash = Int +#if MIN_VERSION_directory(1,2,0) +type ModTime = UTCTime +#else +type ModTime = ClockTime +#endif + +-- | The state necessary to determine whether a monitored file has changed. +-- +-- This covers all the cases of 'MonitorFilePath' except for globs which is +-- covered separately by 'MonitorStateGlob'. +-- +data MonitorStateFile + = MonitorStateFile !ModTime -- ^ cached file mtime + | MonitorStateFileHashed !ModTime !Hash -- ^ cached mtime and content hash + | MonitorStateFileNonExistent + + -- | These two are to deal with the situation where we've been asked + -- to monitor a file that's expected to exist, but when we come to + -- check it's status, it no longer exists. + | MonitorStateFileGone + | MonitorStateFileHashGone + deriving (Show, Generic) + +instance Binary MonitorStateFile + +-- | The state necessary to determine whether the files matched by a globbing +-- match have changed. +-- +data MonitorStateGlob + = MonitorStateGlobDirs + !Glob !FilePathGlob + !ModTime + ![(FilePath, MonitorStateGlob)] -- invariant: sorted + + | MonitorStateGlobFiles + !Glob + !ModTime + ![(FilePath, ModTime, Hash)] -- invariant: sorted + deriving (Show, Generic) + +instance Binary MonitorStateGlob + +-- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by +-- inspecting the state of the file system, and we can go in the reverse +-- direction by just forgetting the extra info. +-- +reconstructMonitorFilePaths :: MonitorStateFileSet -> [MonitorFilePath] +reconstructMonitorFilePaths (MonitorStateFileSet singlePaths globPaths) = + Map.foldrWithKey (\k x r -> getSinglePath k x : r) + (map getGlobPath globPaths) + singlePaths + where + getSinglePath filepath monitorState = + case monitorState of + MonitorStateFile{} -> MonitorFile filepath + MonitorStateFileHashed{} -> MonitorFileHashed filepath + MonitorStateFileNonExistent -> MonitorNonExistentFile filepath + MonitorStateFileGone -> MonitorFile filepath + MonitorStateFileHashGone -> MonitorFileHashed filepath + + getGlobPath (MonitorStateGlobDirs glob globs _ _) = + MonitorFileGlob (GlobDir glob globs) + getGlobPath (MonitorStateGlobFiles glob _ _) = + MonitorFileGlob (GlobFile glob) + +------------------------------------------------------------------------------ +-- Checking the status of monitored files +-- + +-- | A monitor for detecting changes to a set of files. It can be used to +-- efficiently test if any of a set of files (specified individually or by +-- glob patterns) has changed since some snapshot. In addition, it also checks +-- for changes in a value (of type @a@), and when there are no changes in +-- either it returns a saved value (of type @b@). +-- +-- The main use case looks like this: suppose we have some expensive action +-- that depends on certain pure inputs and reads some set of files, and +-- produces some pure result. We want to avoid re-running this action when it +-- would produce the same result. So we need to monitor the files the action +-- looked at, the other pure input values, and we need to cache the result. +-- Then at some later point, if the input value didn't change, and none of the +-- files changed, then we can re-use the cached result rather than re-running +-- the action. +-- +-- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance +-- saves state in a disk file, so the file for that has to be specified, +-- making sure it is unique. The pattern is to use 'checkFileMonitorChanged' +-- to see if there's been any change. If there is, re-run the action, keeping +-- track of the files, then use 'updateFileMonitor' to record the current +-- set of files to monitor, the current input value for the action, and the +-- result of the action. +-- +-- The typical occurrence of this pattern is captured by 'rerunIfChanged' +-- and the 'Rebuild' monad. More complicated cases may need to use +-- 'checkFileMonitorChanged' and 'updateFileMonitor' directly. +-- +data FileMonitor a b + = FileMonitor { + + -- | The file where this 'FileMonitor' should store its state. + -- + fileMonitorCacheFile :: FilePath, + + -- | Compares a new cache key with old one to determine if a + -- corresponding cached value is still valid. + -- + -- Typically this is just an equality test, but in some + -- circumstances it can make sense to do things like subset + -- comparisons. + -- + -- The first arg is the new value, the second is the old cached value. + -- + fileMonitorKeyValid :: a -> a -> Bool, + + -- | When this mode is enabled, if 'checkFileMonitorChanged' returns + -- 'MonitoredValueChanged' then we have the guarantee that no files + -- changed, that the value change was the only change. In the default + -- mode no such guarantee is provided which is slightly faster. + -- + fileMonitorCheckIfOnlyValueChanged :: Bool + } + +-- | Define a new file monitor. +-- +-- It's best practice to define file monitor values once, and then use the +-- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this +-- ensures you get the same types @a@ and @b@ for reading and writing. +-- +-- The path of the file monitor itself must be unique because it keeps state +-- on disk and these would clash. +-- +newFileMonitor :: Eq a => FilePath -- ^ The file to cache the state of the + -- file monitor. Must be unique. + -> FileMonitor a b +newFileMonitor path = FileMonitor path (==) False + +-- | The result of 'checkFileMonitorChanged': either the monitored files or +-- value changed (and it tells us which it was) or nothing changed and we get +-- the cached result. +-- +data MonitorChanged a b = + -- | The monitored files and value did not change. The cached result is + -- @b@. + -- + -- The set of monitored files is also returned. This is useful + -- for composing or nesting 'FileMonitor's. + MonitorUnchanged b [MonitorFilePath] + + -- | The monitor found that something changed. The reason is given. + -- + | MonitorChanged (MonitorChangedReason a) + deriving Show + +-- | What kind of change 'checkFileMonitorChanged' detected. +-- +data MonitorChangedReason a = + + -- | One of the files changed (existence, file type, mtime or file + -- content, depending on the 'MonitorFilePath' in question) + MonitoredFileChanged FilePath + + -- | The pure input value changed. + -- + -- The previous cached key value is also returned. This is sometimes + -- useful when using a 'fileMonitorKeyValid' function that is not simply + -- '(==)', when invalidation can be partial. In such cases it can make + -- sense to 'updateFileMonitor' with a key value that's a combination of + -- the new and old (e.g. set union). + | MonitoredValueChanged a + + -- | There was no saved monitor state, cached value etc. Ie the file + -- for the 'FileMonitor' does not exist. + | MonitorFirstRun + + -- | There was existing state, but we could not read it. This typically + -- happens when the code has changed compared to an existing 'FileMonitor' + -- cache file and type of the input value or cached value has changed such + -- that we cannot decode the values. This is completely benign as we can + -- treat is just as if there were no cache file and re-run. + | MonitorCorruptCache + deriving (Show, Functor) + +-- | Test if the input value or files monitored by the 'FileMonitor' have +-- changed. If not, return the cached value. +-- +-- See 'FileMonitor' for a full explanation. +-- +checkFileMonitorChanged + :: (Binary a, Binary b) + => FileMonitor a b -- ^ cache file path + -> FilePath -- ^ root directory + -> a -- ^ guard or key value + -> IO (MonitorChanged a b) -- ^ did the key or any paths change? +checkFileMonitorChanged + monitor@FileMonitor { fileMonitorKeyValid, + fileMonitorCheckIfOnlyValueChanged } + root currentKey = + + -- Consider it a change if the cache file does not exist, + -- or we cannot decode it. Sadly ErrorCall can still happen, despite + -- using decodeFileOrFail, e.g. Data.Char.chr errors + + handleDoesNotExist (MonitorChanged MonitorFirstRun) $ + handleErrorCall (MonitorChanged MonitorCorruptCache) $ + readCacheFile monitor + >>= either (\_ -> return (MonitorChanged MonitorCorruptCache)) + checkStatusCache + + where + checkStatusCache (cachedFileStatus, cachedKey, cachedResult) = do + change <- checkForChanges + case change of + Just reason -> return (MonitorChanged reason) + Nothing -> return (MonitorUnchanged cachedResult monitorFiles) + where monitorFiles = reconstructMonitorFilePaths cachedFileStatus + where + -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that + -- if we return MonitoredValueChanged that only the value changed. + -- We do that by checkin for file changes first. Otherwise it makes + -- more sense to do the cheaper test first. + checkForChanges + | fileMonitorCheckIfOnlyValueChanged + = checkFileChange cachedFileStatus cachedKey cachedResult + `mplusMaybeT` + checkValueChange cachedKey + + | otherwise + = checkValueChange cachedKey + `mplusMaybeT` + checkFileChange cachedFileStatus cachedKey cachedResult + + mplusMaybeT :: Monad m => m (Maybe a) -> m (Maybe a) -> m (Maybe a) + mplusMaybeT ma mb = do + mx <- ma + case mx of + Nothing -> mb + Just x -> return (Just x) + + -- Check if the guard value has changed + checkValueChange cachedKey + | not (fileMonitorKeyValid currentKey cachedKey) + = return (Just (MonitoredValueChanged cachedKey)) + | otherwise + = return Nothing + + -- Check if any file has changed + checkFileChange cachedFileStatus cachedKey cachedResult = do + res <- probeFileSystem root cachedFileStatus + case res of + -- Some monitored file has changed + Left changedPath -> + return (Just (MonitoredFileChanged (normalise changedPath))) + + -- No monitored file has changed + Right (cachedFileStatus', cacheStatus) -> do + + -- But we might still want to update the cache + whenCacheChanged cacheStatus $ + rewriteCacheFile monitor cachedFileStatus' cachedKey cachedResult + + return Nothing + +-- | Helper for reading the cache file. +-- +-- This determines the type and format of the binary cache file. +-- +readCacheFile :: (Binary a, Binary b) + => FileMonitor a b + -> IO (Either String (MonitorStateFileSet, a, b)) +readCacheFile FileMonitor {fileMonitorCacheFile} = + withBinaryFile fileMonitorCacheFile ReadMode $ \hnd -> + Binary.decodeOrFailIO =<< BS.hGetContents hnd + +-- | Helper for writing the cache file. +-- +-- This determines the type and format of the binary cache file. +-- +rewriteCacheFile :: (Binary a, Binary b) + => FileMonitor a b + -> MonitorStateFileSet -> a -> b -> IO () +rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result = + writeFileAtomic fileMonitorCacheFile $ + Binary.encode (fileset, key, result) + +-- | Probe the file system to see if any of the monitored files have changed. +-- +-- It returns Nothing if any file changed, or returns a possibly updated +-- file 'MonitorStateFileSet' plus an indicator of whether it actually changed. +-- +-- We may need to update the cache since there may be changes in the filesystem +-- state which don't change any of our affected files. +-- +-- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a +-- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run +-- and find @proj2@ was created, yet contains no files matching @*.cabal@ then +-- we want to update the cache despite no changes in our relevant file set. +-- Specifically, we should add an mtime for this directory so we can avoid +-- re-traversing the directory in future runs. +-- +probeFileSystem :: FilePath -> MonitorStateFileSet + -> IO (Either FilePath (MonitorStateFileSet, CacheChanged)) +probeFileSystem root (MonitorStateFileSet singlePaths globPaths) = + runChangedM $ + MonitorStateFileSet + <$> traverseWithKey (probeFileStatus root) singlePaths + <*> traverse (probeGlobStatus root ".") globPaths + +traverseWithKey :: (Applicative t, Eq k) + => (k -> a -> t b) -> Map k a -> t (Map k b) +#if MIN_VERSION_containers(0,5,0) +traverseWithKey = Map.traverseWithKey +#else +traverseWithKey f = fmap Map.fromAscList + . traverse (\(k, v) -> (,) k <$> f k v) + . Map.toAscList +#endif + + +----------------------------------------------- +-- Monad for checking for file system changes +-- +-- We need to be able to bail out if we detect a change (using ExceptT), +-- but if there's no change we need to be able to rebuild the monitor +-- state. And we want to optimise that rebuilding by keeping track if +-- anything actually changed (using StateT), so that in the typical case +-- we can avoid rewriting the state file. + +newtype ChangedM a = ChangedM (StateT CacheChanged (ExceptT FilePath IO) a) + deriving (Functor, Applicative, Monad, MonadIO) + +runChangedM :: ChangedM a -> IO (Either FilePath (a, CacheChanged)) +runChangedM (ChangedM action) = + runExceptT $ State.runStateT action CacheUnchanged + +somethingChanged :: FilePath -> ChangedM a +somethingChanged path = ChangedM $ throwError path + +cacheChanged :: ChangedM () +cacheChanged = ChangedM $ State.put CacheChanged + +data CacheChanged = CacheChanged | CacheUnchanged + +whenCacheChanged :: Monad m => CacheChanged -> m () -> m () +whenCacheChanged CacheChanged action = action +whenCacheChanged CacheUnchanged _ = return () + +---------------------- + +-- | Probe the file system to see if a single monitored file has changed. +-- +probeFileStatus :: FilePath -> FilePath -> MonitorStateFile + -> ChangedM MonitorStateFile +probeFileStatus root file cached = do + case cached of + MonitorStateFile mtime -> probeFileModificationTime + root file mtime + MonitorStateFileHashed mtime hash -> probeFileModificationTimeAndHash + root file mtime hash + MonitorStateFileNonExistent -> probeFileNonExistence root file + MonitorStateFileGone -> somethingChanged file + MonitorStateFileHashGone -> somethingChanged file + + return cached + + +-- | Probe the file system to see if a monitored file glob has changed. +-- +probeGlobStatus :: FilePath -- ^ root path + -> FilePath -- ^ path of the directory we are looking in + -- relative to @root@ + -> MonitorStateGlob + -> ChangedM MonitorStateGlob +probeGlobStatus root dirName + (MonitorStateGlobDirs glob globPath mtime children) = do + change <- liftIO $ checkDirectoryModificationTime (root dirName) mtime + case change of + Nothing -> do + children' <- sequence + [ do fstate' <- probeGlobStatus root (dirName fname) fstate + return (fname, fstate') + | (fname, fstate) <- children ] + return $! MonitorStateGlobDirs glob globPath mtime children' + + Just mtime' -> do + -- directory modification time changed: + -- a matching subdir may have been added or deleted + matches <- filterM (\entry -> let subdir = root dirName entry + in liftIO $ doesDirectoryExist subdir) + . filter (globMatches glob) + =<< liftIO (getDirectoryContents (root dirName)) + + children' <- mapM probeMergeResult $ + mergeBy (\(path1,_) path2 -> compare path1 path2) + children + (sort matches) + return $! MonitorStateGlobDirs glob globPath mtime' children' + -- Note that just because the directory has changed, we don't force + -- a cache rewrite with 'cacheChanged' since that has some cost, and + -- all we're saving is scanning the directory. But we do rebuild the + -- cache with the new mtime', so that if the cache is rewritten for + -- some other reason, we'll take advantage of that. + + where + probeMergeResult :: MergeResult (FilePath, MonitorStateGlob) FilePath + -> ChangedM (FilePath, MonitorStateGlob) + + -- Only in cached (directory deleted) + probeMergeResult (OnlyInLeft (path, fstate)) = + case allMatchingFiles (dirName path) fstate of + [] -> return (path, fstate) + -- Strictly speaking we should be returning 'CacheChanged' above + -- as we should prune the now-missing 'MonitorStateGlob'. However + -- we currently just leave these now-redundant entries in the + -- cache as they cost no IO and keeping them allows us to avoid + -- rewriting the cache. + (file:_) -> somethingChanged file + + -- Only in current filesystem state (directory added) + probeMergeResult (OnlyInRight path) = do + fstate <- liftIO $ buildMonitorStateGlob + root (dirName path) globPath + case allMatchingFiles (dirName path) fstate of + (file:_) -> somethingChanged file + -- This is the only case where we use 'cacheChanged' because we can + -- have a whole new dir subtree (of unbounded size and cost), so we + -- need to save the state of that new subtree in the cache. + [] -> cacheChanged >> return (path, fstate) + + -- Found in path + probeMergeResult (InBoth (path, fstate) _) = do + fstate' <- probeGlobStatus root (dirName path) fstate + return (path, fstate') + + -- | Does a 'MonitorStateGlob' have any relevant files within it? + allMatchingFiles :: FilePath -> MonitorStateGlob -> [FilePath] + allMatchingFiles dir (MonitorStateGlobFiles _ _ entries) = + [ dir fname | (fname, _, _) <- entries ] + allMatchingFiles dir (MonitorStateGlobDirs _ _ _ entries) = + [ res + | (subdir, fstate) <- entries + , res <- allMatchingFiles (dir subdir) fstate ] + + +probeGlobStatus root dirName (MonitorStateGlobFiles glob mtime children) = do + change <- liftIO $ checkDirectoryModificationTime (root dirName) mtime + mtime' <- case change of + Nothing -> return mtime + Just mtime' -> do + -- directory modification time changed: + -- a matching file may have been added or deleted + matches <- filterM (\entry -> let file = root dirName entry + in liftIO $ doesFileExist file) + . filter (globMatches glob) + =<< liftIO (getDirectoryContents (root dirName)) + + mapM_ probeMergeResult $ + mergeBy (\(path1,_,_) path2 -> compare path1 path2) + children + (sort matches) + return mtime' + + -- Check that none of the children have changed + forM_ children $ \(file, fmtime, fhash) -> + probeFileModificationTimeAndHash root (dirName file) fmtime fhash + + return (MonitorStateGlobFiles glob mtime' children) + -- Again, we don't force a cache rewite with 'cacheChanged', but we do use + -- the new mtime' if any. + where + probeMergeResult :: MergeResult (FilePath, ModTime, Hash) FilePath + -> ChangedM () + probeMergeResult mr = case mr of + InBoth _ _ -> return () + -- this is just to be able to accurately report which file changed: + OnlyInLeft (path, _, _) -> somethingChanged (dirName path) + OnlyInRight path -> somethingChanged (dirName path) + +------------------------------------------------------------------------------ + +-- | Update the input value and the set of files monitored by the +-- 'FileMonitor', plus the cached value that may be returned in future. +-- +-- This takes a snapshot of the state of the monitored files right now, so +-- 'checkFileMonitorChanged' will look for file system changes relative to +-- this snapshot. So consider carefully when is the appropriate point to take +-- the snapshot. +-- +-- This is typically done once the action has been completed successfully and +-- we have the action's result and we know what files it looked at. See +-- 'FileMonitor' for a full explanation. +-- +updateFileMonitor + :: (Binary a, Binary b) + => FileMonitor a b -- ^ cache file path + -> FilePath -- ^ root directory + -> [MonitorFilePath] -- ^ files of interest relative to root + -> a -- ^ the current key value + -> b -- ^ the current result value + -> IO () +updateFileMonitor monitor root monitorFiles cachedKey cachedResult = do + fsc <- buildMonitorStateFileSet root monitorFiles + rewriteCacheFile monitor fsc cachedKey cachedResult + +-- | Take the snapshot of the monitored files. That is, given the +-- specification of the set of files we need to monitor, inspect the state +-- of the file system now and collect the information we'll need later to +-- determine if anything has changed. +-- +buildMonitorStateFileSet :: FilePath -- ^ root directory + -> [MonitorFilePath] -- ^ patterns of interest + -- relative to root + -> IO MonitorStateFileSet +buildMonitorStateFileSet root = + go Map.empty [] + where + go :: Map FilePath MonitorStateFile -> [MonitorStateGlob] + -> [MonitorFilePath] -> IO MonitorStateFileSet + go !singlePaths !globPaths [] = + return (MonitorStateFileSet singlePaths globPaths) + + go !singlePaths !globPaths (MonitorFile path : monitors) = do + let file = root path + monitorState <- handleDoesNotExist MonitorStateFileGone $ + MonitorStateFile <$> getModificationTime file + let singlePaths' = Map.insert path monitorState singlePaths + go singlePaths' globPaths monitors + + go !singlePaths !globPaths (MonitorFileHashed path : monitors) = do + let file = root path + monitorState <- handleDoesNotExist MonitorStateFileHashGone $ + MonitorStateFileHashed + <$> getModificationTime file + <*> readFileHash file + let singlePaths' = Map.insert path monitorState singlePaths + go singlePaths' globPaths monitors + + go !singlePaths !globPaths (MonitorNonExistentFile path : monitors) = do + let singlePaths' = Map.insert path MonitorStateFileNonExistent singlePaths + go singlePaths' globPaths monitors + + go !singlePaths !globPaths (MonitorFileGlob globPath : monitors) = do + monitorState <- buildMonitorStateGlob root "." globPath + go singlePaths (monitorState : globPaths) monitors + + +-- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case +-- of a file glob. +-- +-- This gets used both by 'buildMonitorStateFileSet' when we're taking the +-- file system snapshot, but also by 'probeGlobStatus' as part of checking +-- the monitored (globed) files for changes when we find a whole new subtree. +-- +buildMonitorStateGlob :: FilePath -- ^ the root directory + -> FilePath -- ^ directory we are examining + -- relative to the root + -> FilePathGlob -- ^ the matching glob + -> IO MonitorStateGlob +buildMonitorStateGlob root dir globPath = do + dirEntries <- getDirectoryContents (root dir) + dirMTime <- getModificationTime (root dir) + case globPath of + GlobDir glob globPath' -> do + subdirs <- filterM (\subdir -> doesDirectoryExist + (root dir subdir)) + $ filter (globMatches glob) dirEntries + subdirStates <- + forM (sort subdirs) $ \subdir -> do + fstate <- buildMonitorStateGlob root (dir subdir) globPath' + return (subdir, fstate) + return $! MonitorStateGlobDirs glob globPath' dirMTime subdirStates + + GlobFile glob -> do + files <- filterM (\fname -> doesFileExist (root dir fname)) + $ filter (globMatches glob) dirEntries + filesStates <- + forM (sort files) $ \file -> do + let path = root dir file + mtime <- getModificationTime path + hash <- readFileHash path + return (file, mtime, hash) + return $! MonitorStateGlobFiles glob dirMTime filesStates + +-- | Utility to match a file glob against the file system, starting from a +-- given root directory. The results are all relative to the given root. +-- +matchFileGlob :: FilePath -> FilePathGlob -> IO [FilePath] +matchFileGlob root glob0 = go glob0 "" + where + go (GlobFile glob) dir = do + entries <- getDirectoryContents (root dir) + let files = filter (globMatches glob) entries + return (map (dir ) files) + + go (GlobDir glob globPath) dir = do + entries <- getDirectoryContents (root dir) + subdirs <- filterM (\subdir -> doesDirectoryExist + (root dir subdir)) + $ filter (globMatches glob) entries + concat <$> mapM (\subdir -> go globPath (dir subdir)) subdirs +--TODO: [code cleanup] plausibly FilePathGlob and matchFileGlob should be +-- moved into D.C.Glob and/or merged with similar functionality in Cabal. + +------------------------------------------------------------------------------ +-- Utils +-- + +-- | Within the @root@ directory, check if @file@ has its 'ModTime' is +-- the same as @mtime@, short-circuiting if it is different. +probeFileModificationTime :: FilePath -> FilePath -> ModTime -> ChangedM () +probeFileModificationTime root file mtime = do + unchanged <- liftIO $ checkModificationTimeUnchanged root file mtime + unless unchanged (somethingChanged file) + +-- | Within the @root@ directory, check if @file@ has its 'ModTime' and +-- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is +-- different. +probeFileModificationTimeAndHash :: FilePath -> FilePath -> ModTime -> Hash + -> ChangedM () +probeFileModificationTimeAndHash root file mtime hash = do + unchanged <- liftIO $ + checkFileModificationTimeAndHashUnchanged root file mtime hash + unless unchanged (somethingChanged file) + +-- | Within the @root@ directory, check if @file@ still does not exist. +-- If it *does* exist, short-circuit. +probeFileNonExistence :: FilePath -> FilePath -> ChangedM () +probeFileNonExistence root file = do + exists <- liftIO $ doesFileExist (root file) + when exists (somethingChanged file) + +-- | Returns @True@ if, inside the @root@ directory, @file@ has the same +-- 'ModTime' as @mtime@. +checkModificationTimeUnchanged :: FilePath -> FilePath + -> ModTime -> IO Bool +checkModificationTimeUnchanged root file mtime = + handleDoesNotExist False $ do + mtime' <- getModificationTime (root file) + return (mtime == mtime') + +-- | Returns @True@ if, inside the @root@ directory, @file@ has the +-- same 'ModTime' and 'Hash' as @mtime and @chash@. +checkFileModificationTimeAndHashUnchanged :: FilePath -> FilePath + -> ModTime -> Hash -> IO Bool +checkFileModificationTimeAndHashUnchanged root file mtime chash = + handleDoesNotExist False $ do + mtime' <- getModificationTime (root file) + if mtime == mtime' + then return True + else do + chash' <- readFileHash (root file) + return (chash == chash') + +-- | Read a non-cryptographic hash of a @file@. +readFileHash :: FilePath -> IO Hash +readFileHash file = + withBinaryFile file ReadMode $ \hnd -> + evaluate . Hashable.hash =<< BS.hGetContents hnd + +-- | Given a directory @dir@, return @Nothing@ if its 'ModTime' +-- is the same as @mtime@, and the new 'ModTime' if it is not. +checkDirectoryModificationTime :: FilePath -> ModTime -> IO (Maybe ModTime) +checkDirectoryModificationTime dir mtime = + handleDoesNotExist Nothing $ do + mtime' <- getModificationTime dir + if mtime == mtime' + then return Nothing + else return (Just mtime') + +-- | Run an IO computation, returning @e@ if it raises a "file +-- does not exist" error. +handleDoesNotExist :: a -> IO a -> IO a +handleDoesNotExist e = + handleJust + (\ioe -> if isDoesNotExistError ioe then Just ioe else Nothing) + (\_ -> return e) + +-- | Run an IO computation, returning @e@ if there is an 'error' +-- call. ('ErrorCall') +handleErrorCall :: a -> IO a -> IO a +handleErrorCall e = + handle (\(ErrorCall _) -> return e) + +------------------------------------------------------------------------------ +-- Instances +-- + +instance Text FilePathGlob where + disp (GlobDir glob pathglob) = disp glob Disp.<> Disp.char '/' + Disp.<> disp pathglob + disp (GlobFile glob) = disp glob + + parse = parse >>= \glob -> (asDir glob <++ asFile glob) + where + asDir glob = do _ <- ReadP.char '/' + globs <- parse + return (GlobDir glob globs) + asFile glob = return (GlobFile glob) + +#if MIN_VERSION_directory(1,2,0) +instance Binary UTCTime where + put (UTCTime (ModifiedJulianDay day) tod) = do + put day + put (toRational tod) + get = do + day <- get + tod <- get + return $! UTCTime (ModifiedJulianDay day) + (fromRational tod) +#else +instance Binary ClockTime where + put (TOD sec subsec) = do + put sec + put subsec + get = do + !sec <- get + !subsec <- get + return (TOD sec subsec) +#endif + +instance Binary MonitorStateFileSet where + put (MonitorStateFileSet singlePaths globPaths) = do + put (1 :: Int) -- version + put singlePaths + put globPaths + get = do + ver <- get + if ver == (1 :: Int) + then do singlePaths <- get + globPaths <- get + return $! MonitorStateFileSet singlePaths globPaths + else fail "MonitorStateFileSet: wrong version" + diff --git a/cabal-install/Distribution/Client/Glob.hs b/cabal-install/Distribution/Client/Glob.hs new file mode 100644 index 00000000000..60419d8c18c --- /dev/null +++ b/cabal-install/Distribution/Client/Glob.hs @@ -0,0 +1,106 @@ +{-# LANGUAGE DeriveGeneric #-} + +module Distribution.Client.Glob + ( GlobAtom(..) + , Glob (..) + , globMatches + ) where + +import Data.List (stripPrefix) +import Control.Monad (liftM2) +import Distribution.Compat.Binary +import GHC.Generics (Generic) + +import Distribution.Text +import Distribution.Compat.ReadP +import qualified Text.PrettyPrint as Disp + + +-- | A piece of a globbing pattern +data GlobAtom = WildCard + | Literal String + | Union [Glob] + deriving (Eq, Show, Generic) + +instance Binary GlobAtom + +-- | A single directory or file component of a globbed path +newtype Glob = Glob [GlobAtom] + deriving (Eq, Show, Generic) + +instance Binary Glob + + +-- | Test whether a file path component matches a globbing pattern +-- +globMatches :: Glob -> String -> Bool +globMatches (Glob atoms) = goStart atoms + where + -- From the man page, glob(7): + -- "If a filename starts with a '.', this character must be + -- matched explicitly." + + go, goStart :: [GlobAtom] -> String -> Bool + + goStart (WildCard:_) ('.':_) = False + goStart (Union globs:rest) cs = any (\(Glob glob) -> + goStart (glob ++ rest) cs) globs + goStart rest cs = go rest cs + + go [] "" = True + go (Literal lit:rest) cs + | Just cs' <- stripPrefix lit cs + = go rest cs' + | otherwise = False + go [WildCard] "" = True + go (WildCard:rest) (c:cs) = go rest (c:cs) || go (WildCard:rest) cs + go (Union globs:rest) cs = any (\(Glob glob) -> + go (glob ++ rest) cs) globs + go [] (_:_) = False + go (_:_) "" = False + +instance Text Glob where + disp (Glob atoms) = Disp.hcat (map dispAtom atoms) + where + dispAtom WildCard = Disp.char '*' + dispAtom (Literal str) = Disp.text (escape str) + dispAtom (Union globs) = Disp.braces + (Disp.hcat (Disp.punctuate (Disp.char ',') + (map disp globs))) + + escape [] = [] + escape (c:cs) + | isGlobEscapedChar c = '\\' : c : escape cs + | otherwise = c : escape cs + + parse = Glob `fmap` many1 globAtom + where + globAtom :: ReadP r GlobAtom + globAtom = literal +++ wildcard +++ union + + wildcard = char '*' >> return WildCard + + union = between (char '{') (char '}') + (fmap (Union . map Glob) $ sepBy1 (many1 globAtom) (char ',')) + + literal = Literal `fmap` many1' + where + litchar = normal +++ escape + + normal = satisfy (not . isGlobEscapedChar) + escape = char '\\' >> satisfy isGlobEscapedChar + + many1' :: ReadP r [Char] + many1' = liftM2 (:) litchar many' + + many' :: ReadP r [Char] + many' = many1' <++ return [] + +isGlobEscapedChar :: Char -> Bool +isGlobEscapedChar '*' = True +isGlobEscapedChar '{' = True +isGlobEscapedChar '}' = True +isGlobEscapedChar ',' = True +isGlobEscapedChar '\\' = True +isGlobEscapedChar '/' = True +isGlobEscapedChar _ = False \ No newline at end of file diff --git a/cabal-install/Distribution/Client/RebuildMonad.hs b/cabal-install/Distribution/Client/RebuildMonad.hs new file mode 100644 index 00000000000..edea8984c86 --- /dev/null +++ b/cabal-install/Distribution/Client/RebuildMonad.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +-- | An abstraction for re-running actions if values or files have changed. +-- +-- This is not a full-blown make-style incremental build system, it's a bit +-- more ad-hoc than that, but it's easier to integrate with existing code. +-- +-- It's a convenient interface to the "Distribution.Client.FileMonitor" +-- functions. +-- +module Distribution.Client.RebuildMonad ( + -- * Rebuild monad + Rebuild, + runRebuild, + + -- * Setting up file monitoring + monitorFiles, + MonitorFilePath(..), + monitorFileSearchPath, + FilePathGlob(..), + + -- * Using a file monitor + FileMonitor(..), + newFileMonitor, + rerunIfChanged, + + -- * Utils + matchFileGlob, + ) where + +import Distribution.Client.FileMonitor + ( MonitorFilePath(..), monitorFileSearchPath + , FilePathGlob(..), matchFileGlob + , FileMonitor(..), newFileMonitor + , MonitorChanged(..), MonitorChangedReason(..) + , checkFileMonitorChanged, updateFileMonitor ) + +import Distribution.Simple.Utils (debug) +import Distribution.Verbosity (Verbosity) + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad.State as State +import Distribution.Compat.Binary (Binary) +import System.FilePath (takeFileName) + + +-- | A monad layered on top of 'IO' to help with re-running actions when the +-- input files and values they depend on change. The crucial operations are +-- 'rerunIfChanged' and 'monitorFiles'. +-- +newtype Rebuild a = Rebuild (StateT [MonitorFilePath] IO a) + deriving (Functor, Applicative, Monad, MonadIO) + +-- | Use this wihin the body action of 'rerunIfChanged' to declare that the +-- action depends on the given files. This can be based on what the action +-- actually did. It is these files that will be checked for changes next +-- time 'rerunIfChanged' is called for that 'FileMonitor'. +-- +monitorFiles :: [MonitorFilePath] -> Rebuild () +monitorFiles filespecs = Rebuild (State.modify (filespecs++)) + +-- | Run a 'Rebuild' IO action. +unRebuild :: Rebuild a -> IO (a, [MonitorFilePath]) +unRebuild (Rebuild action) = runStateT action [] + +-- | Run a 'Rebuild' IO action. +runRebuild :: Rebuild a -> IO a +runRebuild (Rebuild action) = evalStateT action [] + +-- | This captures the standard use pattern for a 'FileMonitor': given a +-- monitor, an action and the input value the action depends on, either +-- re-run the action to get its output, or if the value and files the action +-- depends on have not changed then return a previously cached action result. +-- +-- The result is still in the 'Rebuild' monad, so these can be nested. +-- +-- Do not share 'FileMonitor's between different uses of 'rerunIfChanged'. +-- +rerunIfChanged :: (Eq a, Binary a, Binary b) + => Verbosity + -> FilePath + -> FileMonitor a b + -> a + -> Rebuild b + -> Rebuild b +rerunIfChanged verbosity rootDir monitor key action = do + changed <- liftIO $ checkFileMonitorChanged monitor rootDir key + case changed of + MonitorUnchanged result files -> do + liftIO $ debug verbosity $ "File monitor '" ++ monitorName + ++ "' unchanged." + monitorFiles files + return result + + MonitorChanged reason -> do + liftIO $ debug verbosity $ "File monitor '" ++ monitorName + ++ "' changed: " ++ showReason reason + (result, files) <- liftIO $ unRebuild action + liftIO $ updateFileMonitor monitor rootDir files key result + monitorFiles files + return result + where + monitorName = takeFileName (fileMonitorCacheFile monitor) + + showReason (MonitoredFileChanged file) = "file " ++ file + showReason (MonitoredValueChanged _) = "monitor value changed" + showReason MonitorFirstRun = "first run" + showReason MonitorCorruptCache = "invalid cache file" + diff --git a/cabal-install/Main.hs b/cabal-install/Main.hs index 872ffd0fc65..fe70293877c 100644 --- a/cabal-install/Main.hs +++ b/cabal-install/Main.hs @@ -66,6 +66,10 @@ import Distribution.Client.Targets import qualified Distribution.Client.List as List ( list, info ) +--TODO: temporary import, just to force these modules to be built. +-- It will be replaced by import of new build command once merged. +import Distribution.Client.RebuildMonad () + import Distribution.Client.Install (install) import Distribution.Client.Configure (configure) import Distribution.Client.Update (update) diff --git a/cabal-install/cabal-install.cabal b/cabal-install/cabal-install.cabal index a45cd0d6265..cd9faf5bc61 100644 --- a/cabal-install/cabal-install.cabal +++ b/cabal-install/cabal-install.cabal @@ -143,8 +143,10 @@ executable cabal Distribution.Client.Exec Distribution.Client.Fetch Distribution.Client.FetchUtils + Distribution.Client.FileMonitor Distribution.Client.Freeze Distribution.Client.Get + Distribution.Client.Glob Distribution.Client.GlobalFlags Distribution.Client.GZipUtils Distribution.Client.Haddock @@ -165,6 +167,7 @@ executable cabal Distribution.Client.ParseUtils Distribution.Client.PlanIndex Distribution.Client.Run + Distribution.Client.RebuildMonad Distribution.Client.Sandbox Distribution.Client.Sandbox.Index Distribution.Client.Sandbox.PackageEnvironment @@ -200,6 +203,7 @@ executable cabal Cabal >= 1.23.1 && < 1.24, containers >= 0.4 && < 0.6, filepath >= 1.3 && < 1.5, + hashable >= 1.0 && < 2, HTTP >= 4000.1.5 && < 4000.4, mtl >= 2.0 && < 3, pretty >= 1.1 && < 1.2, From 1e1d82b8e865d12a5fecb6888de158582901f96b Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sun, 7 Feb 2016 04:58:37 +0000 Subject: [PATCH 3/3] Add FileMonitor unit tests --- .../Distribution/Client/FileMonitor.hs | 4 +- cabal-install/cabal-install.cabal | 2 + cabal-install/tests/UnitTests.hs | 3 + .../Distribution/Client/FileMonitor.hs | 558 ++++++++++++++++++ 4 files changed, 565 insertions(+), 2 deletions(-) create mode 100644 cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs diff --git a/cabal-install/Distribution/Client/FileMonitor.hs b/cabal-install/Distribution/Client/FileMonitor.hs index 5389503e2cb..4715f849551 100644 --- a/cabal-install/Distribution/Client/FileMonitor.hs +++ b/cabal-install/Distribution/Client/FileMonitor.hs @@ -111,7 +111,7 @@ data MonitorFilePath = -- equivalent of MonitorFileHashed above. If we need globed files with -- only mtime then it's perfectly ok to add it. - deriving (Show, Generic) + deriving (Eq, Show, Generic) instance Binary MonitorFilePath @@ -334,7 +334,7 @@ data MonitorChangedReason a = -- that we cannot decode the values. This is completely benign as we can -- treat is just as if there were no cache file and re-run. | MonitorCorruptCache - deriving (Show, Functor) + deriving (Eq, Show, Functor) -- | Test if the input value or files monitored by the 'FileMonitor' have -- changed. If not, return the cached value. diff --git a/cabal-install/cabal-install.cabal b/cabal-install/cabal-install.cabal index cd9faf5bc61..24f266e9c04 100644 --- a/cabal-install/cabal-install.cabal +++ b/cabal-install/cabal-install.cabal @@ -259,6 +259,7 @@ Test-Suite unit-tests UnitTests.Distribution.Client.Dependency.Modular.PSQ UnitTests.Distribution.Client.Dependency.Modular.Solver UnitTests.Distribution.Client.Dependency.Modular.DSL + UnitTests.Distribution.Client.FileMonitor UnitTests.Distribution.Client.GZipUtils UnitTests.Distribution.Client.Sandbox UnitTests.Distribution.Client.Tar @@ -274,6 +275,7 @@ Test-Suite unit-tests process, directory, filepath, + hashable, stm, tar, time, diff --git a/cabal-install/tests/UnitTests.hs b/cabal-install/tests/UnitTests.hs index 0030760f207..1e3bfef1575 100644 --- a/cabal-install/tests/UnitTests.hs +++ b/cabal-install/tests/UnitTests.hs @@ -11,6 +11,7 @@ import qualified UnitTests.Distribution.Client.Targets import qualified UnitTests.Distribution.Client.GZipUtils import qualified UnitTests.Distribution.Client.Dependency.Modular.PSQ import qualified UnitTests.Distribution.Client.Dependency.Modular.Solver +import qualified UnitTests.Distribution.Client.FileMonitor tests :: TestTree tests = testGroup "Unit Tests" [ @@ -28,6 +29,8 @@ tests = testGroup "Unit Tests" [ UnitTests.Distribution.Client.Dependency.Modular.PSQ.tests ,testGroup "UnitTests.Distribution.Client.Dependency.Modular.Solver" UnitTests.Distribution.Client.Dependency.Modular.Solver.tests + ,testGroup "UnitTests.Distribution.Client.FileMonitor" + UnitTests.Distribution.Client.FileMonitor.tests ] -- Extra options for running the test suite diff --git a/cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs b/cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs new file mode 100644 index 00000000000..09971e2bf2c --- /dev/null +++ b/cabal-install/tests/UnitTests/Distribution/Client/FileMonitor.hs @@ -0,0 +1,558 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module UnitTests.Distribution.Client.FileMonitor (tests) where + +import Control.Monad +import Control.Exception +import Control.Concurrent (threadDelay) +import qualified Data.Set as Set +import System.FilePath +import System.Directory + +import Distribution.Text (simpleParse) +import Distribution.Compat.Binary +import Distribution.Simple.Utils (withTempDirectory) +import Distribution.Verbosity (silent) + +import Distribution.Client.FileMonitor + +import Test.Tasty +import Test.Tasty.HUnit + + +tests :: [TestTree] +tests = + [ testCase "sanity check mtimes" testFileMTimeSanity + , testCase "no monitor cache" testNoMonitorCache + , testCase "corrupt monitor cache" testCorruptMonitorCache + , testCase "empty monitor" testEmptyMonitor + , testCase "missing file" testMissingFile + , testCase "change file" testChangedFile + , testCase "file mtime vs content" testChangedFileMtimeVsContent + , testCase "remove file" testRemoveFile + , testCase "non-existent file" testNonExistentFile + + , testGroup "glob matches" + [ testCase "no change" testGlobNoChange + , testCase "add match" testGlobAddMatch + , testCase "remove match" testGlobRemoveMatch + , testCase "change match" testGlobChangeMatch + + , testCase "add match subdir" testGlobAddMatchSubdir + , testCase "remove match subdir" testGlobRemoveMatchSubdir + , testCase "change match subdir" testGlobChangeMatchSubdir + + , testCase "add non-match" testGlobAddNonMatch + , testCase "remove non-match" testGlobRemoveNonMatch + + , testCase "add non-match" testGlobAddNonMatchSubdir + , testCase "remove non-match" testGlobRemoveNonMatchSubdir + + , testCase "invariant sorted 1" testInvariantMonitorStateGlobFiles + , testCase "invariant sorted 2" testInvariantMonitorStateGlobDirs + ] + + , testCase "value unchanged" testValueUnchanged + , testCase "value changed" testValueChanged + , testCase "value & file changed" testValueAndFileChanged + , testCase "value updated" testValueUpdated + ] + +-- we rely on file mtimes having a reasonable resolution +testFileMTimeSanity :: Assertion +testFileMTimeSanity = do + withTempDirectory silent "." "file-status-" $ \dir -> do + replicateM_ 10 $ do + writeFile (dir "a") "content" + t1 <- getModificationTime (dir "a") + threadDelayMTimeChange + writeFile (dir "a") "content" + t2 <- getModificationTime (dir "a") + assertBool "expected different file mtimes" (t2 > t1) + +-- first run, where we don't even call updateMonitor +testNoMonitorCache :: Assertion +testNoMonitorCache = + withFileMonitor $ \root monitor -> do + reason <- expectMonitorChanged root (monitor :: FileMonitor () ()) () + reason @?= MonitorFirstRun + +-- write garbage into the binary cache file +testCorruptMonitorCache :: Assertion +testCorruptMonitorCache = + withFileMonitor $ \root monitor -> do + writeFile (fileMonitorCacheFile monitor) "broken" + reason <- expectMonitorChanged root monitor () + reason @?= MonitorCorruptCache + + updateMonitor root monitor [] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [] + + writeFile (fileMonitorCacheFile monitor) "broken" + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitorCorruptCache + +-- no files to monitor +testEmptyMonitor :: Assertion +testEmptyMonitor = + withFileMonitor $ \root monitor -> do + touch root "a" + updateMonitor root monitor [] () () + touch root "b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [] + +-- monitor a file that is expected to exist +testMissingFile :: Assertion +testMissingFile = do + test MonitorFile "a" + test MonitorFileHashed "a" + test MonitorFile "dir/a" + test MonitorFileHashed "dir/a" + where + test monitorKind file = + withFileMonitor $ \root monitor -> do + -- a file that doesn't exist at snapshot time is considered to have + -- changed + updateMonitor root monitor [monitorKind file] () () + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged file + + -- a file doesn't exist at snapshot time, but gets added afterwards is + -- also considered to have changed + updateMonitor root monitor [monitorKind file] () () + touch root file + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitoredFileChanged file + + +testChangedFile :: Assertion +testChangedFile = do + test MonitorFile "a" + test MonitorFileHashed "a" + test MonitorFile "dir/a" + test MonitorFileHashed "dir/a" + where + test monitorKind file = + withFileMonitor $ \root monitor -> do + touch root file + updateMonitor root monitor [monitorKind file] () () + threadDelayMTimeChange + write root file "different" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged file + + +testChangedFileMtimeVsContent :: Assertion +testChangedFileMtimeVsContent = + withFileMonitor $ \root monitor -> do + -- if we don't touch the file, it's unchanged + touch root "a" + updateMonitor root monitor [MonitorFile "a"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [MonitorFile "a"] + + -- if we do touch the file, it's changed if we only consider mtime + updateMonitor root monitor [MonitorFile "a"] () () + threadDelayMTimeChange + touch root "a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "a" + + -- but if we touch the file, it's unchanged if we consider content hash + updateMonitor root monitor [MonitorFileHashed "a"] () () + threadDelayMTimeChange + touch root "a" + (res2, files2) <- expectMonitorUnchanged root monitor () + res2 @?= () + files2 @?= [MonitorFileHashed "a"] + + -- finally if we change the content it's changed + updateMonitor root monitor [MonitorFileHashed "a"] () () + threadDelayMTimeChange + write root "a" "different" + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitoredFileChanged "a" + + +testRemoveFile :: Assertion +testRemoveFile = do + test MonitorFile "a" + test MonitorFileHashed "a" + test MonitorFile "dir/a" + test MonitorFileHashed "dir/a" + where + test monitorKind file = + withFileMonitor $ \root monitor -> do + touch root file + updateMonitor root monitor [monitorKind file] () () + remove root file + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged file + + +-- monitor a file that we expect not to exist +testNonExistentFile :: Assertion +testNonExistentFile = + withFileMonitor $ \root monitor -> do + -- a file that doesn't exist at snapshot time or check time is unchanged + updateMonitor root monitor [MonitorNonExistentFile "a"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [MonitorNonExistentFile "a"] + + -- if the file then exists it has changed + touch root "a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "a" + + -- if the file then exists at snapshot and check time it has changed + updateMonitor root monitor [MonitorNonExistentFile "a"] () () + reason2 <- expectMonitorChanged root monitor () + reason2 @?= MonitoredFileChanged "a" + + -- but if the file existed at snapshot time and doesn't exist at check time + -- it is consider unchanged. This is unlike files we expect to exist, but + -- that's because files that exist can have different content and actions + -- can depend on that content, whereas if the action expected a file not to + -- exist and it now does not, it'll give the same result, irrespective of + -- the fact that the file might have existed in the meantime. + updateMonitor root monitor [MonitorNonExistentFile "a"] () () + remove root "a" + (res2, files2) <- expectMonitorUnchanged root monitor () + res2 @?= () + files2 @?= [MonitorNonExistentFile "a"] + + +------------------ +-- globs +-- + +testGlobNoChange :: Assertion +testGlobNoChange = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/good-b" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + +testGlobAddMatch :: Assertion +testGlobAddMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + + threadDelayMTimeChange + touch root "dir/good-b" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/good-b" + +testGlobRemoveMatch :: Assertion +testGlobRemoveMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/good-b" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + remove root "dir/good-a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/good-a" + +testGlobChangeMatch :: Assertion +testGlobChangeMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/good-b" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + threadDelayMTimeChange + touch root "dir/good-b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + + write root "dir/good-b" "different" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/good-b" + +testGlobAddMatchSubdir :: Assertion +testGlobAddMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + threadDelayMTimeChange + touch root "dir/b/good-b" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/b/good-b" + +testGlobRemoveMatchSubdir :: Assertion +testGlobRemoveMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + touch root "dir/b/good-b" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + removeDir root "dir/a" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/a/good-a" + +testGlobChangeMatchSubdir :: Assertion +testGlobChangeMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + touch root "dir/b/good-b" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + threadDelayMTimeChange + touch root "dir/b/good-b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/good-*"] + + write root "dir/b/good-b" "different" + reason <- expectMonitorChanged root monitor () + reason @?= MonitoredFileChanged "dir/b/good-b" + +testGlobAddNonMatch :: Assertion +testGlobAddNonMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + threadDelayMTimeChange + touch root "dir/bad" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + +testGlobRemoveNonMatch :: Assertion +testGlobRemoveNonMatch = + withFileMonitor $ \root monitor -> do + touch root "dir/good-a" + touch root "dir/bad" + updateMonitor root monitor [monitorFileGlob "dir/good-*"] () () + remove root "dir/bad" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/good-*"] + +testGlobAddNonMatchSubdir :: Assertion +testGlobAddNonMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + threadDelayMTimeChange + touch root "dir/b/bad" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/good-*"] + +testGlobRemoveNonMatchSubdir :: Assertion +testGlobRemoveNonMatchSubdir = + withFileMonitor $ \root monitor -> do + touch root "dir/a/good-a" + touch root "dir/b/bad" + updateMonitor root monitor [monitorFileGlob "dir/*/good-*"] () () + removeDir root "dir/b" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/good-*"] + + +-- try and tickle a bug that happens if we don't maintain the invariant that +-- MonitorStateGlobFiles entries are sorted +testInvariantMonitorStateGlobFiles :: Assertion +testInvariantMonitorStateGlobFiles = + withFileMonitor $ \root monitor -> do + touch root "dir/a" + touch root "dir/b" + touch root "dir/c" + touch root "dir/d" + updateMonitor root monitor [monitorFileGlob "dir/*"] () () + threadDelayMTimeChange + -- so there should be no change (since we're doing content checks) + -- but if we can get the dir entries to appear in the wrong order + -- then if the sorted invariant is not maintained then we can fool + -- the 'probeGlobStatus' into thinking there's changes + remove root "dir/a" + remove root "dir/b" + remove root "dir/c" + remove root "dir/d" + touch root "dir/d" + touch root "dir/c" + touch root "dir/b" + touch root "dir/a" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*"] + +-- same thing for the subdirs case +testInvariantMonitorStateGlobDirs :: Assertion +testInvariantMonitorStateGlobDirs = + withFileMonitor $ \root monitor -> do + touch root "dir/a/file" + touch root "dir/b/file" + touch root "dir/c/file" + touch root "dir/d/file" + updateMonitor root monitor [monitorFileGlob "dir/*/file"] () () + threadDelayMTimeChange + removeDir root "dir/a" + removeDir root "dir/b" + removeDir root "dir/c" + removeDir root "dir/d" + touch root "dir/d/file" + touch root "dir/c/file" + touch root "dir/b/file" + touch root "dir/a/file" + (res, files) <- expectMonitorUnchanged root monitor () + res @?= () + files @?= [monitorFileGlob "dir/*/file"] + + +------------------ +-- value changes +-- + +testValueUnchanged :: Assertion +testValueUnchanged = + withFileMonitor $ \root monitor -> do + touch root "a" + updateMonitor root monitor [MonitorFile "a"] (42 :: Int) "ok" + (res, files) <- expectMonitorUnchanged root monitor 42 + res @?= "ok" + files @?= [MonitorFile "a"] + +testValueChanged :: Assertion +testValueChanged = + withFileMonitor $ \root monitor -> do + touch root "a" + updateMonitor root monitor [MonitorFile "a"] (42 :: Int) "ok" + reason <- expectMonitorChanged root monitor 43 + reason @?= MonitoredValueChanged 42 + +testValueAndFileChanged :: Assertion +testValueAndFileChanged = + withFileMonitor $ \root monitor -> do + touch root "a" + + -- we change the value and the file, and the value change is reported + updateMonitor root monitor [MonitorFile "a"] (42 :: Int) "ok" + threadDelayMTimeChange + touch root "a" + reason <- expectMonitorChanged root monitor 43 + reason @?= MonitoredValueChanged 42 + + -- if fileMonitorCheckIfOnlyValueChanged then if only the value changed + -- then it's reported as MonitoredValueChanged + let monitor' :: FileMonitor Int String + monitor' = monitor { fileMonitorCheckIfOnlyValueChanged = True } + updateMonitor root monitor' [MonitorFile "a"] 42 "ok" + reason2 <- expectMonitorChanged root monitor' 43 + reason2 @?= MonitoredValueChanged 42 + + -- but if a file changed too then we don't report MonitoredValueChanged + updateMonitor root monitor' [MonitorFile "a"] 42 "ok" + threadDelayMTimeChange + touch root "a" + reason3 <- expectMonitorChanged root monitor' 43 + reason3 @?= MonitoredFileChanged "a" + +testValueUpdated :: Assertion +testValueUpdated = + withFileMonitor $ \root monitor -> do + touch root "a" + + let monitor' :: FileMonitor (Set.Set Int) String + monitor' = (monitor :: FileMonitor (Set.Set Int) String) { + fileMonitorCheckIfOnlyValueChanged = True, + fileMonitorKeyValid = Set.isSubsetOf + } + + updateMonitor root monitor' [MonitorFile "a"] (Set.fromList [42,43]) "ok" + (res,_files) <- expectMonitorUnchanged root monitor' (Set.fromList [42]) + res @?= "ok" + + reason <- expectMonitorChanged root monitor' (Set.fromList [42,44]) + reason @?= MonitoredValueChanged (Set.fromList [42,43]) + + +------------- +-- Utils + +newtype RootPath = RootPath FilePath + +write :: RootPath -> FilePath -> String -> IO () +write (RootPath root) fname contents = do + let path = root fname + createDirectoryIfMissing True (takeDirectory path) + writeFile path contents + +touch :: RootPath -> FilePath -> IO () +touch root fname = write root fname "hello" + +-- Wait a moment to ensure a file mtime change +threadDelayMTimeChange :: IO () +#if WIN32 || (MIN_VERSION_directory(1,2,1) && MIN_VERSION_unix(2,6,0)) +-- hi-res file times +threadDelayMTimeChange = threadDelay 10000 -- 10ms +#else +-- second-res file times +threadDelayMTimeChange = threadDelay 1000000 -- 1s +#endif + +remove :: RootPath -> FilePath -> IO () +remove (RootPath root) fname = removeFile (root fname) + +removeDir :: RootPath -> FilePath -> IO () +removeDir (RootPath root) dname = removeDirectoryRecursive (root dname) + +monitorFileGlob :: String -> MonitorFilePath +monitorFileGlob globstr + | Just glob <- simpleParse globstr = MonitorFileGlob glob + | otherwise = error $ "Failed to parse " ++ globstr + + +expectMonitorChanged :: (Binary a, Binary b) + => RootPath -> FileMonitor a b -> a + -> IO (MonitorChangedReason a) +expectMonitorChanged root monitor key = do + res <- checkChanged root monitor key + case res of + MonitorChanged reason -> return reason + MonitorUnchanged _ _ -> throwIO $ HUnitFailure "expected change" + +expectMonitorUnchanged :: (Binary a, Binary b) + => RootPath -> FileMonitor a b -> a + -> IO (b, [MonitorFilePath]) +expectMonitorUnchanged root monitor key = do + res <- checkChanged root monitor key + case res of + MonitorChanged _reason -> throwIO $ HUnitFailure "expected no change" + MonitorUnchanged b files -> return (b, files) + +checkChanged :: (Binary a, Binary b) + => RootPath -> FileMonitor a b + -> a -> IO (MonitorChanged a b) +checkChanged (RootPath root) monitor key = + checkFileMonitorChanged monitor root key + +updateMonitor :: (Binary a, Binary b) + => RootPath -> FileMonitor a b + -> [MonitorFilePath] -> a -> b -> IO () +updateMonitor (RootPath root) monitor files key result = + updateFileMonitor monitor root files key result + +withFileMonitor :: Eq a => (RootPath -> FileMonitor a b -> IO c) -> IO c +withFileMonitor action = do + withTempDirectory silent "." "file-status-" $ \root -> do + let monitorFile = root <.> "monitor" + monitor = newFileMonitor monitorFile + finally (action (RootPath root) monitor) $ do + exists <- doesFileExist monitorFile + when exists $ removeFile monitorFile +