// Utilities to help with incremental builds.
//
// There are four things we consider for each rule:
//  - the global config, any change to which invalidates artifacts
//    (it's too hard to work out which bits affect which rules)
//  - the rule definition itself (the command to run, etc)
//  - any input files it might have
//  - any dependencies.
//
// If all of those are the same as the last time the rule was run,
// we can safely assume that the output will be the same this time
// and so we don't have to re-run it again.

package build

import "bytes"
import "crypto/md5"
import "fmt"
import "hash"
import "io"
import "os"
import "path"
import "path/filepath"
import "sync"

import "core"


// Return true if the rule needs building, false if the existing outputs are OK.
func needsBuilding(state *core.BuildState, target *core.BuildTarget) bool {
    // Check the dependencies first, because they don't need any disk I/O.
    if target.NeedsTransitiveDependencies {
        if anyDependencyHasChanged(target) {
            return true  // one of the transitive deps has changed, need to rebuild
        }
    } else {
        for _, dep := range(target.Dependencies) {
            if dep.State < core.Unchanged {
                return true  // dependency has just been rebuilt, do this too.
            }
        }
    }
    oldRuleHash, oldConfigHash, oldSourceHash := readRuleHashFile(ruleHashFileName(target))
    if !bytes.Equal(oldConfigHash, state.ConfigHash) || !bytes.Equal(oldRuleHash, ruleHash(target)) {
        return true
    }
    newSourceHash, err := sourceHash(state.Graph, target)
    if err != nil || !bytes.Equal(oldSourceHash, newSourceHash) {
        return true
    }
    // Check the outputs of this rule exist. This would only happen if the user had
    // removed them but it's incredibly aggravating if you remove an output and the
    // rule won't rebuild itself.
    for _, output := range(target.Outputs) {
        realOutput := path.Join(target.OutDir(), output)
        if !core.PathExists(realOutput) {
            log.Debug("Output %s doesn't exist for rule %s; will rebuild.", realOutput, target.Label)
            return true
        }
    }
    return false  // \o/
}

// Returns true if any transitive dependency of this target has changed.
func anyDependencyHasChanged(target *core.BuildTarget) bool {
    done := map[core.BuildLabel]bool{}
    var inner func(*core.BuildTarget) bool
    inner = func(dependency *core.BuildTarget) bool {
        done[dependency.Label] = true
        if dependency != target && dependency.State < core.Unchanged {
            return true
        } else if !dependency.OutputIsComplete {
            for _, dep := range(dependency.Dependencies) {
                if !done[dep.Label] {
                    if inner(dep) {
                        return true
                    }
                }
            }
        }
        return false
    }
    return inner(target)
}

// Calculate the hash of all sources of this rule
func sourceHash(graph *core.BuildGraph, target *core.BuildTarget) ([]byte, error) {
    h := md5.New()
    for source := range iterSources(graph, target, false) {
        result, err := pathHash(source.src)
        if err != nil {
            return result, err
        }
        h.Write(result)
    }
    return h.Sum(nil), nil
}

// Used to memoize the results of pathHash so we don't hash the same files multiple times.
var pathHashMemoizer = map[string][]byte{}
var pathHashMutex sync.RWMutex  // Of course it will be accessed concurrently.


// Calculate the hash of a single path which might be a file or a directory
// This is the memoized form that only hashes each path once.
func pathHash(path string) ([]byte, error) {
    pathHashMutex.RLock()
    cached, present := pathHashMemoizer[path]
    pathHashMutex.RUnlock()
    if present {
        return cached, nil
    }
    pathHashMutex.Lock()
    defer pathHashMutex.Unlock()
    result, err := pathHashImpl(path)
    if err != nil {
        pathHashMemoizer[path] = result
    }
    return result, err
}

func pathHashImpl(path string) ([]byte, error) {
    h := md5.New()
    var err error
    if info, err := os.Stat(path); err == nil && info.IsDir() {
        err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
            if err != nil {
                return err
            } else if !info.IsDir() {
                return fileHash(&h, path)
            } else {
                return nil
            }
        })
    } else {
        err = fileHash(&h, path)  // let this handle any other errors
    }
    return h.Sum(nil), err
}

// Calculate the hash of a single file
func fileHash(h *hash.Hash, filename string) error {
    file, err := os.Open(filename)
    if err != nil {
        return err
    }
    defer file.Close()
    if _, err := io.Copy(*h, file); err != nil {
        return err
    }
    return nil
}

// Hash the relevant bits of this rule that affect the outputs.
// Note that we have to hash on the declared fields, we obviously can't hash pointers etc.
// And we need to be fairly careful if new fields were added to the struct.
func ruleHash(target *core.BuildTarget) []byte {
    h := md5.New()
    h.Write([]byte(target.Label.String()))
    for _, dep := range target.DeclaredDependencies {
        h.Write([]byte(dep.String()))
    }
    for _, vis := range target.Visibility {
        h.Write([]byte(vis.String()))  // Doesn't strictly affect the output, but best to be safe.
    }
    hashBool(h, target.IsBinary)
    hashBool(h, target.IsTest)
    h.Write([]byte(target.Command))
    h.Write([]byte(target.TestCommand))
    h.Write([]byte{byte(target.TestResultFormat)})
    hashBool(h, target.NeedsTransitiveDependencies)
    hashBool(h, target.OutputIsComplete)
    return h.Sum(nil)
}

func hashBool(writer hash.Hash, b bool) {
    if b {
        writer.Write([]byte{2})
    } else {
        writer.Write([]byte{1})
    }
}

// Returns all the sources for a function, allowing for sources that are other rules
// and rules that require transitive dependencies.
// Yielded values are pairs of the original source location and its temporary location for this rule.
type sourcePair struct { src, tmp string }
func iterSources(graph *core.BuildGraph, target *core.BuildTarget, includeTransitive bool) <-chan sourcePair {
    ch := make(chan sourcePair)
    done := map[core.BuildLabel]bool{}
    tmpDir := target.TmpDir()
    var inner func(dependency *core.BuildTarget)
    inner = func(dependency *core.BuildTarget) {
        if target == dependency {
            // This is the current build rule, so link its sources.
            for _, source := range(dependency.Sources) {
                label := source.Label()
                if label == nil {  // We'll pick up dependent rules later.
                    fullPaths := source.FullPaths(graph)
                    for i, sourcePath := range(source.Paths(graph)) {
                        ch <- sourcePair{fullPaths[i], path.Join(tmpDir, sourcePath)}
                    }
                }
            }
        } else {
            // This is a dependency of the rule, so link its outputs.
            for _, dep := range(dependency.Outputs) {
                depPath := path.Join(dependency.OutDir(), dep)
                tmpPath := path.Join(tmpDir, dependency.Label.PackageName, dep)
                ch <- sourcePair{depPath, tmpPath}
            }
        }
        if includeTransitive && (target == dependency || (target.NeedsTransitiveDependencies && !dependency.OutputIsComplete)) {
            done[dependency.Label] = true
            for _, dep := range(dependency.Dependencies) {
                if !done[dep.Label] {
                    inner(dep)
                }
            }
        }
    }
    go func() {
        inner(target)
        close(ch)
    }()
    return ch
}

// Reads the contents of a rule hash file into separate byte arrays
// Arrays will be empty if there's an error reading the file.
func readRuleHashFile(filename string) ([]byte, []byte, []byte) {
    contents := make([]byte, 48, 48)  // These files are always exactly the same length
    file, err := os.Open(filename)
    if err != nil {
        return []byte{}, []byte{}, []byte{}
    }
    defer file.Close()
    if n, err := file.Read(contents); err != nil || n != 48 {
        return []byte{}, []byte{}, []byte{}
    }
    return contents[0:16], contents[16:32], contents[32:48]
}

// Writes the contents of the rule hash file
func writeRuleHashFile(state *core.BuildState, target *core.BuildTarget) error {
    hash, err := sourceHash(state.Graph, target)
    if err != nil {
        return err
    }
    file, err := os.Create(ruleHashFileName(target))
    if err != nil {
        return err
    }
    defer file.Close()
    n1, err := file.Write(ruleHash(target))
    n2, err := file.Write(state.ConfigHash)
    n3, err := file.Write(hash)
    if n1 + n2 + n3 != 48 {
        return fmt.Errorf("Wrote %d bytes (%d, %d, %d) to rule hash file; should be 48", n1 + n2 + n3, n1, n2, n3)
    }
    return nil
}

// Returns the filename we'll store the hashes for this file in.
func ruleHashFileName(target *core.BuildTarget) string {
    return path.Join(target.OutDir(), ".rule_hash_" + target.Label.Name)
}

func hasFileChanged(oldFile string, newFile string) bool {
    hash1, err1 := pathHash(oldFile);
    hash2, err2 := pathHash(newFile);
    // Ignore errors, something else can handle that.
    return err1 != nil || err2 != nil || !bytes.Equal(hash1, hash2)
}
