diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..44d0081 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +kadai3-1/manhdaovan/vendor +cover.out +kadai3-2/manhdaovan/vendor +vendor +kadai3-2/manhdaovan/tmp diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fbc5b39 --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module github.com/gopherdojo/dojo5 + +go 1.12 + +require ( + github.com/pkg/errors v0.8.0 + golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b + golang.org/x/sync v0.0.0-20190423024810-112230192c58 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..eb03380 --- /dev/null +++ b/go.sum @@ -0,0 +1,9 @@ +github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b h1:lkjdUzSyJ5P1+eal9fxXX9Xg2BTfswsonKUse48C0uE= +golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/kadai3-2/manhdaovan/Makefile b/kadai3-2/manhdaovan/Makefile new file mode 100644 index 0000000..982b9e2 --- /dev/null +++ b/kadai3-2/manhdaovan/Makefile @@ -0,0 +1,13 @@ +COVERAGE_VIEW_MODE=html# or "func" +GOMOD=on#or "off" or "auto" + +install: + - GO111MODULE=${GOMOD} go mod vendor + - GO111MODULE=${GOMOD} go mod tidy +build: ./cmd/main.go + - GO111MODULE=${GOMOD} go build -a -o ./bin/mget ./cmd/main.go +test: ./pkg/mget/mget_test.go + - GO111MODULE=${GOMOD} go test -a -race ./pkg/... +coverage: test + - GO111MODULE=${GOMOD} go test -coverprofile=cover.out ./... + - GO111MODULE=${GOMOD} go tool cover -${COVERAGE_VIEW_MODE} cover.out diff --git a/kadai3-2/manhdaovan/README.md b/kadai3-2/manhdaovan/README.md new file mode 100644 index 0000000..e69de29 diff --git a/kadai3-2/manhdaovan/bin/mget b/kadai3-2/manhdaovan/bin/mget new file mode 100755 index 0000000..49a7598 Binary files /dev/null and b/kadai3-2/manhdaovan/bin/mget differ diff --git a/kadai3-2/manhdaovan/cmd/main.go b/kadai3-2/manhdaovan/cmd/main.go new file mode 100644 index 0000000..5355456 --- /dev/null +++ b/kadai3-2/manhdaovan/cmd/main.go @@ -0,0 +1,84 @@ +package main + +import ( + "context" + "flag" + "fmt" + "net/http" + "os" + "runtime" + + "github.com/gopherdojo/dojo5/kadai3-2/manhdaovan/pkg/mget" +) + +const ( + exitErr = 1 +) + +func main() { + ca := parseArgs() + if err := ca.validate(); err != nil { + fmt.Fprintln(os.Stderr, "given args error: ", err) + printHelp() + os.Exit(exitErr) + } + if _, err := os.Stat(ca.outPath); err != nil { + fmt.Fprintf(os.Stderr, "invalid output path: %s, err: %+v\n", ca.outPath, err) + printHelp() + os.Exit(exitErr) + } + + downloader := mget.NewMGet(http.DefaultClient, ca.numWorkers, mget.DefaultExitSigs, "", "") + outputPath, err := downloader.Download(context.Background(), ca.outPath, ca.url) + if err != nil { + fmt.Fprintf(os.Stderr, "download error: %+v\n", err) + printHelp() + os.Exit(exitErr) + } + + fmt.Println("Download completed. Output: ", outputPath) +} + +type cliArgs struct { + numWorkers uint + outPath string + url string +} + +func (ca *cliArgs) validate() error { + if ca.numWorkers == 0 { + return fmt.Errorf("number worker must be greater than 0") + } + if ca.url == "" { + return fmt.Errorf("no download url given") + } + + if ca.outPath == "" { + ca.outPath = "./out/" + } + + return nil +} + +func parseArgs() *cliArgs { + var ca cliArgs + flag.UintVar(&ca.numWorkers, "w", uint(runtime.NumCPU()), "number of workers") + flag.StringVar(&ca.outPath, "o", "", "output file path") + flag.Parse() + ca.url = flag.Arg(0) // last arg is download url + + return &ca +} + +func printHelp() { + helpStr := ` +mget -- a simple concurrency download tool +usage: $./bin/mget [-w] [-o] download-url +options: + -w unit + Number of workers that download in concurrent + -o string + Output of downloaded file. It can be a directory or a file path. +` + fmt.Println(helpStr) +} diff --git a/kadai3-2/manhdaovan/out/.gitignore b/kadai3-2/manhdaovan/out/.gitignore new file mode 100644 index 0000000..099e5d5 --- /dev/null +++ b/kadai3-2/manhdaovan/out/.gitignore @@ -0,0 +1,2 @@ +*/* +* \ No newline at end of file diff --git a/kadai3-2/manhdaovan/pkg/mget/chunk.go b/kadai3-2/manhdaovan/pkg/mget/chunk.go new file mode 100644 index 0000000..9daecc4 --- /dev/null +++ b/kadai3-2/manhdaovan/pkg/mget/chunk.go @@ -0,0 +1,34 @@ +package mget + +import ( + "fmt" + "path/filepath" +) + +type chunkInfo struct { + idx int + url string + size uint64 + rangeLow uint64 + rangeHigh uint64 +} + +func newChunkInfo(idx int, url string, fileSize, chunkSize uint64) *chunkInfo { + offset := uint64(idx) * chunkSize + if offset+chunkSize > fileSize { // last chunk + chunkSize = fileSize - offset + } + + return &chunkInfo{ + idx: idx, + url: url, + size: chunkSize, + rangeLow: offset, + rangeHigh: offset + chunkSize - 1, + } +} + +func chunkPath(dstDir, dstFile string, idx int) string { + chunkName := fmt.Sprintf("%s-%d.chunk", dstFile, idx) + return filepath.Join(dstDir, chunkName) +} diff --git a/kadai3-2/manhdaovan/pkg/mget/file.go b/kadai3-2/manhdaovan/pkg/mget/file.go new file mode 100644 index 0000000..8bb669a --- /dev/null +++ b/kadai3-2/manhdaovan/pkg/mget/file.go @@ -0,0 +1,22 @@ +package mget + +import ( + "path/filepath" + "strings" +) + +// parseDirAndFileName returns dir path and file name from given path +func parseDirAndFileName(path string) (dir, file string) { + lastSlashIdx := strings.LastIndex(path, "/") + dir = path[:lastSlashIdx+1] + if len(dir) == len(path) { // path is a dir + return dir, "" + } + + return dir, path[(len(dir) + 1):] +} + +// parseFileName returns file name from given url +func parseFileName(url string) string { + return filepath.Base(url) +} diff --git a/kadai3-2/manhdaovan/pkg/mget/mget.go b/kadai3-2/manhdaovan/pkg/mget/mget.go new file mode 100644 index 0000000..a4524a4 --- /dev/null +++ b/kadai3-2/manhdaovan/pkg/mget/mget.go @@ -0,0 +1,223 @@ +package mget + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "os/signal" + "path/filepath" + "strconv" + "syscall" + + "github.com/pkg/errors" + "golang.org/x/net/context/ctxhttp" + "golang.org/x/sync/errgroup" +) + +// DefaultExitSigs is default signals that make this tool exit +var DefaultExitSigs = []os.Signal{syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT} + +// MGet represents struct of downloader +type MGet struct { + workerNum uint + exitSigs []os.Signal + UserAgent string + Referer string + httpClient *http.Client + + dstDir string + dstFile string + + sigChan chan os.Signal + errChan chan error + doneChan chan struct{} +} + +// NewMGet inits new MGet +func NewMGet(httpClient *http.Client, workers uint, exitSigs []os.Signal, userAgent, referer string) *MGet { + m := &MGet{ + workerNum: workers, + exitSigs: exitSigs, + UserAgent: userAgent, + Referer: referer, + httpClient: httpClient, + } + m.init() + return m +} + +// Download returns downloaded file path and error of downloading. +func (m *MGet) Download(ctx context.Context, dst, url string) (string, error) { + m.setDirAndFileName(dst, url) + defer m.shutdown() + return m.download(ctx, url) +} + +func (m *MGet) init() { + if len(m.exitSigs) == 0 { // no signal given + m.exitSigs = DefaultExitSigs + } + m.errChan = make(chan error, 2) // cap for one error and closing + m.doneChan = make(chan struct{}, 2) // cap for one struct{} and closing + m.sigChan = make(chan os.Signal, 2) // cap for one sig and closing + signal.Notify(m.sigChan, m.exitSigs...) +} + +func (m *MGet) setDirAndFileName(dstPath, url string) { + m.dstDir, m.dstFile = parseDirAndFileName(dstPath) + if m.dstFile == "" { // dstPath is a directory, no custom file name given + m.dstFile = parseFileName(url) + } +} + +func (m *MGet) download(ctx context.Context, url string) (savedFilePath string, err error) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + res, err := ctxhttp.Head(ctx, m.httpClient, url) + if err != nil { + err = errors.Wrap(err, "failed to head request: "+url) + return + } + + if res.Header.Get("Accept-Ranges") != "bytes" { + fmt.Println("[WARN]: server not support concurrency downloading") + m.workerNum = 1 + } + + fileSize, err := strconv.ParseUint(res.Header.Get("Content-Length"), 10, 64) + if err != nil { + err = errors.Wrapf(err, "cannot get file size") + return + } + + chunkSize := fileSize / uint64(m.workerNum) + if (fileSize % uint64(m.workerNum)) != 0 { + m.workerNum++ // recalculate workers to fit with chunks + } + + go func() { + eg, ctx := errgroup.WithContext(ctx) + for i := 0; i < int(m.workerNum); i++ { + chunk := newChunkInfo(i, url, fileSize, chunkSize) + eg.Go(func() error { + return m.downloadChunk(ctx, chunk) + }) + } + + if egErr := eg.Wait(); err != nil { + m.errChan <- errors.Wrapf(egErr, "error on chunks downloading") + return + } + + m.doneChan <- struct{}{} + }() + + select { + case <-ctx.Done(): + cancel() + err = fmt.Errorf("timeout") + case sig := <-m.sigChan: + err = fmt.Errorf("got quit sig: %s", sig.String()) + case downloadErr := <-m.errChan: + err = downloadErr + case <-m.doneChan: + err = nil + } + + if err != nil { + return + } + + savedFilePath, err = m.mergeChunks() + return +} + +func (m *MGet) downloadChunk(ctx context.Context, chunk *chunkInfo) error { + // create get request + req, err := http.NewRequest(http.MethodGet, chunk.url, nil) + if err != nil { + return errors.Wrapf(err, "failed to init NewRequest for chunk: %d", chunk.idx) + } + + // set download ranges + req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", chunk.rangeLow, chunk.rangeHigh)) + // set useragent + if m.UserAgent != "" { + req.Header.Set("User-Agent", m.UserAgent) + } + // set referer + if m.Referer != "" { + req.Header.Set("Referer", m.Referer) + } + + res, reqErr := ctxhttp.Do(ctx, m.httpClient, req) + if reqErr != nil { + return errors.Wrapf(reqErr, "cannot get data for chunk %d", chunk.idx) + } + defer res.Body.Close() + + chunkPath := chunkPath(m.dstDir, m.dstFile, chunk.idx) + chunkFile, createErr := os.Create(chunkPath) + if createErr != nil { + return errors.Wrapf(reqErr, "cannot create chunk %d at %s", chunk.idx, chunkPath) + } + defer chunkFile.Close() + + if _, err := io.Copy(chunkFile, res.Body); err != nil { + return errors.Wrapf(reqErr, "cannot save data for chunk %d to %s", chunk.idx, chunkPath) + } + + return nil +} + +func (m *MGet) mergeChunks() (string, error) { + savedPath := filepath.Join(m.dstDir, m.dstFile) + resultFile, err := os.Create(savedPath) + if err != nil { + return "", errors.Wrapf(err, "cannot create result file") + } + defer resultFile.Close() + + mergeChunk := func(srcFile *os.File, chunkPath string) error { + chunkData, err := os.Open(chunkPath) + defer chunkData.Close() + + if err != nil { + return err + } + if _, err := io.Copy(resultFile, chunkData); err != nil { + return err + } + + return nil + } + + for i := 0; i < int(m.workerNum); i++ { + chunkIdx := i + chunkPath := chunkPath(m.dstDir, m.dstFile, chunkIdx) + if err := mergeChunk(resultFile, chunkPath); err != nil { + return "", errors.Wrapf(err, "cannot merge chunk: %d", chunkIdx) + } + } + + return savedPath, nil +} + +func (m *MGet) shutdown() { + close(m.sigChan) + close(m.errChan) + close(m.doneChan) + m.cleanChunks() +} + +func (m *MGet) cleanChunks() { + for chunkIdx := 0; chunkIdx < int(m.workerNum); chunkIdx++ { + chunkPath := chunkPath(m.dstDir, m.dstFile, chunkIdx) + if err := os.Remove(chunkPath); err != nil { + fmt.Fprintf(os.Stderr, "error on remove chunk %s data: %v\n", chunkPath, err) + } + } +} diff --git a/kadai3-2/manhdaovan/pkg/mget/mget_test.go b/kadai3-2/manhdaovan/pkg/mget/mget_test.go new file mode 100644 index 0000000..8ee6698 --- /dev/null +++ b/kadai3-2/manhdaovan/pkg/mget/mget_test.go @@ -0,0 +1,136 @@ +package mget_test + +import ( + "context" + "crypto/md5" + "io/ioutil" + "net/http" + "net/http/httptest" + "strconv" + "strings" + "testing" + + "github.com/gopherdojo/dojo5/kadai3-2/manhdaovan/pkg/mget" +) + +// initMockServerFile inits a httptest.Server and a callback to close server +func initMockServerFile(t *testing.T, initFnc func(t *testing.T) http.HandlerFunc) (*httptest.Server, func()) { + t.Helper() + ts := httptest.NewServer(initFnc(t)) + return ts, func() { ts.Close() } +} + +type mockServerFile struct { + file string +} + +func (msf *mockServerFile) mockServerHandler(t *testing.T) http.HandlerFunc { + t.Helper() + return func(w http.ResponseWriter, r *http.Request) { + + w.Header().Set("Accept-Ranges", "bytes") + headerRange := r.Header.Get("Range") + + body := func() []byte { + testDataBytes, err := ioutil.ReadFile(msf.file) + if err != nil { + t.Errorf("cannot read test file:%s, error: %v", msf.file, err) + } + + if headerRange == "" { + return testDataBytes + } + + rangeItems := strings.Split(headerRange, "=") + if rangeItems[0] != "bytes" { + t.Errorf("range header should be bytes, got: %s", headerRange) + } + rangeValues := strings.Split(rangeItems[1], "-") + + rangeFrom, err := strconv.Atoi(rangeValues[0]) + if err != nil { + t.Errorf("invalid range-from value: %v, error: %v", rangeValues[0], err) + } + + rangeTo, err := strconv.Atoi(rangeValues[1]) + if err != nil { + t.Errorf("invalid range-to value: %v, error: %v", rangeValues[1], err) + } + + return testDataBytes[rangeFrom:rangeTo] + }() + + w.Header().Set("Content-Length", strconv.Itoa(len(body))) + w.WriteHeader(http.StatusPartialContent) + w.Write(body) + } +} + +func TestMGet_Download(t *testing.T) { + type fields struct { + workerNum uint + } + type args struct { + dst string + } + tests := []struct { + name string + fields fields + args args + want string + wantErr bool + }{ + { + name: "file bytes are not divisible by workers", + fields: fields{workerNum: 5}, + args: args{dst: "./testdata/out/"}, + want: "testdata/out/378bytes.text", + wantErr: false, + }, + { + name: "file bytes are divisible by workers", + fields: fields{workerNum: 3}, + args: args{dst: "./testdata/out/"}, + want: "testdata/out/378bytes.text", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockServerFile := mockServerFile{file: "./testdata/378bytes.text"} + mockServer, shutdownMockServer := initMockServerFile(t, mockServerFile.mockServerHandler) + defer shutdownMockServer() + + m := mget.NewMGet( + http.DefaultClient, + tt.fields.workerNum, + mget.DefaultExitSigs, + "", "", + ) + got, err := m.Download(context.Background(), tt.args.dst, mockServer.URL+"/file/378bytes.text") + if (err != nil) != tt.wantErr { + t.Errorf("MGet.Download() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want || !isSameFileContent(t, got, tt.want) { + t.Errorf("MGet.Download() = %v, want %v", got, tt.want) + } + }) + } +} + +func isSameFileContent(t *testing.T, file1, file2 string) bool { + t.Helper() + bytesFile1, err := ioutil.ReadFile(file1) + if err != nil { + t.Errorf("cannot open file: %s", file1) + return false + } + bytesFile2, err := ioutil.ReadFile(file2) + if err != nil { + t.Errorf("cannot open file: %s", file2) + return false + } + + return md5.Sum(bytesFile1) == md5.Sum(bytesFile2) +} diff --git a/kadai3-2/manhdaovan/pkg/mget/testdata/378bytes.text b/kadai3-2/manhdaovan/pkg/mget/testdata/378bytes.text new file mode 100644 index 0000000..8a95983 --- /dev/null +++ b/kadai3-2/manhdaovan/pkg/mget/testdata/378bytes.text @@ -0,0 +1,4 @@ +また、次のようにレシーバを束縛していなくても変数に入れることができます。 +その際、変数の型はfunc (c *Counter)のように、レシーバが第1引数になります。 +なお、引数があるメソッドの場合は、レシーバが第1引数に、第1引数が第2引数に、 +のように1つずつずれる形になります。 \ No newline at end of file