diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 018a826..5ad512f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -13,8 +13,7 @@ jobs: - name: Setup go uses: actions/setup-go@v5 with: - go-version: "1.21" - cache: false + go-version: "stable" - name: Build run: "go build ./..." - name: Test @@ -28,13 +27,11 @@ jobs: - name: Setup go uses: actions/setup-go@v5 with: - go-version: "1.21" - cache: false + go-version: "stable" - name: Lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v6 with: - version: v1.55.1 - args: --config=.golangci.yaml + version: latest codeqL-build: name: CodeQL build runs-on: ubuntu-latest diff --git a/.golangci.yaml b/.golangci.yaml index 415485c..4a758aa 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,5 +1,6 @@ output: - format: github-actions + formats: + - format: colored-line-number print-issued-lines: true print-linter-name: true uniq-by-line: true @@ -14,6 +15,7 @@ issues: max-same-issues: 0 severity: + default-severity: error case-sensitive: true linters: @@ -27,16 +29,15 @@ linters: - bodyclose - containedctx - cyclop - - deadcode - decorder - depguard - dogsled - dupl - dupword + - err113 - errchkjson - execinquery - exhaustive - - exhaustivestruct - exhaustruct - exportloopref - forbidigo @@ -53,11 +54,9 @@ linters: - gocyclo - godot - godox - - goerr113 - gofumpt - goheader - goimports - - golint - gomnd - gomoddirectives - gomodguard @@ -65,18 +64,16 @@ linters: - gosec - gosmopolitan - grouper - - ifshort - importas - interfacebloat - - interfacer - ireturn - lll - loggercheck - maintidx - makezero - - maligned - mirror - misspell + - mnd - musttag - nakedret - nestif @@ -86,7 +83,6 @@ linters: - noctx - nolintlint - nonamedreturns - - nosnakecase - nosprintfhostport - paralleltest - prealloc @@ -95,9 +91,7 @@ linters: - reassign - revive - rowserrcheck - - scopelint - sqlclosecheck - - structcheck - stylecheck - tagalign - tagliatelle @@ -109,7 +103,6 @@ linters: - unconvert - unparam - usestdlibvars - - varcheck - varnamelen - wastedassign - whitespace diff --git a/Dockerfile b/Dockerfile index 01d27c6..4381392 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21 AS build +FROM golang:1.23 AS build WORKDIR /go/src/app diff --git a/go.mod b/go.mod index acfab46..2d2a0e8 100644 --- a/go.mod +++ b/go.mod @@ -1,27 +1,50 @@ module github.com/nlnwa/fai -go 1.21 +go 1.22.0 + +toolchain go1.22.7 require ( - github.com/nlnwa/gowarc v1.1.1 - github.com/prometheus/client_golang v1.17.0 + github.com/minio/minio-go/v7 v7.0.77 + github.com/prometheus/client_golang v1.20.4 + github.com/spf13/pflag v1.0.5 + github.com/spf13/viper v1.19.0 ) require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/bits-and-blooms/bitset v1.10.0 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/google/uuid v1.4.0 // indirect - github.com/klauspost/compress v1.17.2 // indirect - github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect - github.com/nlnwa/whatwg-url v0.4.0 // indirect - github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/common v0.45.0 // indirect - github.com/prometheus/procfs v0.12.0 // indirect - github.com/prometheus/prometheus v0.47.2 // indirect - golang.org/x/net v0.17.0 // indirect - golang.org/x/sys v0.13.0 // indirect - golang.org/x/text v0.13.0 // indirect - google.golang.org/protobuf v1.31.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-ini/ini v1.67.0 // indirect + github.com/goccy/go-json v0.10.3 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/klauspost/compress v1.17.10 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/magiconair/properties v1.8.7 // indirect + github.com/minio/md5-simd v1.1.2 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.60.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/rs/xid v1.6.0 // indirect + github.com/sagikazarmark/locafero v0.6.0 // indirect + github.com/sagikazarmark/slog-shim v0.1.0 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spf13/afero v1.11.0 // indirect + github.com/spf13/cast v1.7.0 // indirect + github.com/subosito/gotenv v1.6.0 // indirect + go.uber.org/atomic v1.11.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/crypto v0.28.0 // indirect + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/text v0.19.0 // indirect + google.golang.org/protobuf v1.35.1 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 9aa5190..d6316a1 100644 --- a/go.sum +++ b/go.sum @@ -1,91 +1,143 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bits-and-blooms/bitset v1.5.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= -github.com/bits-and-blooms/bitset v1.10.0 h1:ePXTeiPEazB5+opbv5fr8umg2R/1NlzgDsyepwsSr88= -github.com/bits-and-blooms/bitset v1.10.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= -github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= -github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= -github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= -github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= -github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= -github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= -github.com/nlnwa/gowarc v1.1.1 h1:C0kWp8aX3E/+/TvaqlO6I86drYO8qdZDGbJLD+aJilk= -github.com/nlnwa/gowarc v1.1.1/go.mod h1:aFbCuh7ZaGlKO2IAH7GbMgp0b2K8kzK2cb0zWsXWp/8= -github.com/nlnwa/whatwg-url v0.4.0 h1:B3kFb5EL7KILeBkhrlQvFi41Ex0p4ropVA9brt5ungI= -github.com/nlnwa/whatwg-url v0.4.0/go.mod h1:pLzpJjFPtA+n7RCLvp0GBxvDHa/2ckNCBK9mfEeNOMQ= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= +github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v7 v7.0.71 h1:No9XfOKTYi6i0GnBj+WZwD8WP5GZfL7n7GOjRqCdAjA= +github.com/minio/minio-go/v7 v7.0.71/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo= +github.com/minio/minio-go/v7 v7.0.77 h1:GaGghJRg9nwDVlNbwYjSDJT1rqltQkBFDsypWX1v3Bw= +github.com/minio/minio-go/v7 v7.0.77/go.mod h1:AVM3IUN6WwKzmwBxVdjzhH8xq+f57JSbbvzqvUzR6eg= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= +github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q= -github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY= -github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= -github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= -github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= -github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= -github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= -github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/prometheus/prometheus v0.47.2 h1:jWcnuQHz1o1Wu3MZ6nMJDuTI0kU5yJp9pkxh8XEkNvI= -github.com/prometheus/prometheus v0.47.2/go.mod h1:J/bmOSjgH7lFxz2gZhrWEZs2i64vMS+HIuZfmYNhJ/M= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= +github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.54.0 h1:ZlZy0BgJhTwVZUn7dLOkwCZHUkrAqd3WYtcFCWnM1D8= +github.com/prometheus/common v0.54.0/go.mod h1:/TQgMJP5CuVYveyT7n/0Ix8yLNNXy9yRSkhnLTHPDIQ= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= +github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= +github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= +github.com/sagikazarmark/locafero v0.6.0 h1:ON7AQg37yzcRPU69mt7gwhFEBwxI6P9T4Qu3N51bwOk= +github.com/sagikazarmark/locafero v0.6.0/go.mod h1:77OmuIc6VTraTXKXIs/uvUxKGUXjE1GbemJYHqdNjX0= +github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= +github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= +github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= +github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= +github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= +github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= +github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= +github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= -go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= -golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= +go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= +golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= -golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/checksum/checksum.go b/internal/checksum/checksum.go deleted file mode 100644 index d3f50de..0000000 --- a/internal/checksum/checksum.go +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2023 National Library of Norway. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package checksum - -import ( - "crypto/md5" - "encoding/hex" - "io" - "os" - "path/filepath" -) - -// MD5Sum returns the md5 checksum of the given file encoded as a hex string. -func MD5Sum(filepath string) (string, error) { - f, err := os.Open(filepath) - if err != nil { - return "", err - } - defer f.Close() - - h := md5.New() - if _, err := io.Copy(h, f); err != nil { - return "", err - } - - return hex.EncodeToString(h.Sum(nil)), nil -} - -// separator is used to separate the checksum from the filepath in the checksum file. -const separator = " *" - -// CreateChecksumFile creates a checksum file for the given file. -// It returns the path to the created checksum file. -func CreateChecksumFile(file string, checksum string, extension string) (string, error) { - // Don't allow empty checksum - if checksum == "" { - panic("checksum is empty") - } - - checksumFile := file + extension - content := checksum + separator + filepath.Base(file) + "\n" - - // Create checksum file - f, err := os.Create(checksumFile) - if err != nil { - return "", err - } - defer f.Close() - - // Write content to checksum file - _, err = f.WriteString(content) - if err != nil { - return "", err - } - - return checksumFile, nil -} diff --git a/internal/checksum/checksum_test.go b/internal/checksum/checksum_test.go deleted file mode 100644 index 200c50f..0000000 --- a/internal/checksum/checksum_test.go +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2023 National Library of Norway. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package checksum - -import ( - "os" - "path/filepath" - "testing" -) - -func TestMD5Sum(t *testing.T) { - tmpDir := t.TempDir() - f, err := os.CreateTemp(tmpDir, "test") - - if err != nil { - t.Fatalf("failed to create test file: %v", err) - } - _, err = f.WriteString("Test string") - if err != nil { - t.Fatalf("failed to write to test file: %v", err) - } - testFile := f.Name() - defer os.Remove(testFile) - defer f.Close() - - got, err := MD5Sum(testFile) - if err != nil { - t.Errorf("expected no error, but got %v", err) - } - want := "0fd3dbec9730101bff92acc820befc34" - if got != want { - t.Errorf("expected %s, but got %s", want, got) - } -} - -func TestCreateChecksumFile(t *testing.T) { - tmpDir := t.TempDir() - f, err := os.CreateTemp(tmpDir, "test") - - if err != nil { - t.Fatalf("failed to create test file: %v", err) - } - _, err = f.WriteString("Test string") - if err != nil { - t.Fatalf("failed to write to test file: %v", err) - } - testFile := f.Name() - defer os.Remove(testFile) - defer f.Close() - - wantHash := "0fd3dbec9730101bff92acc820befc34" - fileHash, err := MD5Sum(testFile) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - checksumFile, err := CreateChecksumFile(testFile, fileHash, ".md5") - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - defer os.Remove(checksumFile) - - b, err := os.ReadFile(checksumFile) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - got := string(b) - want := wantHash + separator + filepath.Base(testFile) + "\n" - - if got != want { - t.Errorf("expected %s, got %s", want, got) - } -} diff --git a/internal/fai/fai.go b/internal/fai/fai.go index afa3afc..6023896 100644 --- a/internal/fai/fai.go +++ b/internal/fai/fai.go @@ -19,126 +19,84 @@ package fai import ( "context" "errors" - "log/slog" + "fmt" "os" "path/filepath" "time" - - "github.com/nlnwa/fai/internal/checksum" - "github.com/nlnwa/fai/internal/metrics" - "github.com/nlnwa/fai/internal/queue" - "github.com/nlnwa/fai/internal/warc" ) -type fAI struct { - sourceDir string - validTargetDir string - invalidTargetDir string - tmpDir string - concurrency int - sleep time.Duration - globPattern string - logger *slog.Logger +type options struct { + sourceDir string + sleep time.Duration + globPattern string + inspect func(string) } -func New(options ...Option) (*fAI, error) { - opts, err := validateOptions(options...) - if err != nil { - return nil, err - } +type option func(opts *options) - return &fAI{ - sourceDir: opts.sourceDir, - validTargetDir: opts.validTargetDir, - invalidTargetDir: opts.invalidTargetDir, - tmpDir: opts.tmpDir, - concurrency: opts.concurrency, - sleep: opts.sleep, - globPattern: opts.globPattern, - logger: opts.logger, - }, nil +func WithSourceDir(dir string) option { + return func(opts *options) { + opts.sourceDir = dir + } } -const checksumFileSuffix = ".md5" - -func (f *fAI) processFile(file string) { - if _, err := os.Stat(file); errors.Is(err, os.ErrNotExist) { - // file does not exist so skip it - return +func WithSleep(sleep time.Duration) option { + return func(opts *options) { + opts.sleep = sleep } +} - // calculate checksum - md5sum, err := checksum.MD5Sum(file) - if err != nil { - f.logger.Error("Failed to calculate checksum", "file", file, "error", err) - return - } - // create checksum file - checksumFile, err := checksum.CreateChecksumFile(file, md5sum, checksumFileSuffix) - if err != nil { - f.logger.Error("Failed to create checksum file", "file", file, "error", err) - return +func WithGlobPattern(globPattern string) option { + return func(opts *options) { + opts.globPattern = globPattern } +} - // validate file - isValid, err := warc.IsValid(file, f.tmpDir) - if err != nil { - f.logger.Error("Failed to validate file", "file", file, "error", err) - return +func WithInspector(inspector func(string)) option { + return func(opts *options) { + opts.inspect = inspector } +} - targetDir := f.validTargetDir - if !isValid { - targetDir = f.invalidTargetDir - metrics.ValidationError() +func New(opts ...option) (*options, error) { + o := &options{} + for _, opt := range opts { + opt(o) } - newChecksumFile := filepath.Join(targetDir, filepath.Base(checksumFile)) - newFile := filepath.Join(targetDir, filepath.Base(file)) - - // Move checksum file and file to target directory. - // - // The order is important because a failed move of the checksum file - // will result in the file being checksummed again (ok). If the file - // is moved first and the checksum file fails to move then the - // checksum file will never be created (not ok). + var err error - // move checksum file to new location - err = os.Rename(checksumFile, newChecksumFile) + // assert source directory exists + o.sourceDir, err = filepath.Abs(o.sourceDir) if err != nil { - f.logger.Error("Failed to move checksum file", "source", checksumFile, "target", newChecksumFile, "error", err) - return + return nil, fmt.Errorf("failed to get absolute path of source directory: %w", err) } - - // move file to new location - err = os.Rename(file, newFile) - if err != nil { - f.logger.Error("Failed to move file", "source", file, "target", newFile, "error", err) - return + // assert source + if info, err := os.Stat(o.sourceDir); errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("source directory does not exist: %w", err) + } else if err != nil { + return nil, fmt.Errorf("failed to stat source directory: %w", err) + } else if !info.IsDir() { + return nil, errors.New("source is not a directory") } - // get file size - fileInfo, err := os.Stat(newFile) + o.globPattern = filepath.Join(o.sourceDir, o.globPattern) + + // assert glob pattern is valid + _, err = filepath.Glob(o.globPattern) if err != nil { - f.logger.Error("Failed to get file size", "file", newFile, "error", err) - return + return nil, fmt.Errorf("invalid glob pattern: %w", err) } - fileSizeBytes := fileInfo.Size() - - metrics.Size(fileSizeBytes) - name := filepath.Base(newFile) - f.logger.Info("Processed file", "name", name, "size", fileSizeBytes, "md5", md5sum, "valid", isValid) + return o, nil } -// Run starts the FAI. -// It will run until the context is cancelled or stop after one pass if the sleep duration is zero. -func (f *fAI) Run(ctx context.Context) { - f.logger.Info("Starting FAI", "sourceDir", f.sourceDir, "validTargetDir", f.validTargetDir, "invalidTargetDir", f.invalidTargetDir, "concurrency", f.concurrency, "sleep", f.sleep) - - queue := queue.NewWorkQueue(f.processFile, f.concurrency) - defer queue.CloseAndWait() - +// Run starts the first article inspection. +// Once every sleep duration the inspect function is run on all files in the +// source source directory matching the glob pattern. +// It will run until the context is done or after one pass if the sleep +// duration is configured as zero. +func (f *options) Run(ctx context.Context) { for { files, _ := filepath.Glob(f.globPattern) for _, file := range files { @@ -146,8 +104,8 @@ func (f *fAI) Run(ctx context.Context) { case <-ctx.Done(): return default: - queue.Add(file) } + f.inspect(file) } select { case <-ctx.Done(): diff --git a/internal/fai/fai_test.go b/internal/fai/fai_test.go index 6310b9b..f97e3e5 100644 --- a/internal/fai/fai_test.go +++ b/internal/fai/fai_test.go @@ -2,62 +2,61 @@ package fai import ( "context" + "errors" "os" - "path/filepath" "testing" + + "github.com/nlnwa/fai/internal/queue" ) -// createTestFile creates a test file in the given directory -func createTestFile(t *testing.T, dir string) *os.File { - t.Helper() - f, _ := os.CreateTemp(dir, "testfile") - return f +func TestNew(t *testing.T) { + // Test glob pattern is invalid + _, err := New( + WithGlobPattern("["), + ) + if err == nil { + t.Error("expected error when glob pattern is invalid") + } } func TestRun(t *testing.T) { sourceDir := t.TempDir() - targetDir := t.TempDir() - - testFiles := []struct { - file *os.File - expectedDir string - isValid bool - }{ - { - file: createTestFile(t, sourceDir), - expectedDir: targetDir, - }, - { - file: createTestFile(t, sourceDir), - expectedDir: targetDir, - }, + var testFiles []*os.File + + for range 10 { + f, _ := os.CreateTemp(sourceDir, "testfile") + testFiles = append(testFiles, f) + } + + worker := func(path string) { + err := os.Remove(path) + if err != nil { + t.Errorf("failed to remove file: %v", err) + } } + q := queue.NewWorkQueue(worker, len(testFiles)) + fai, err := New( - WithConcurrency(len(testFiles)), - WithSleep(0), + WithSleep(0), // single pass WithSourceDir(sourceDir), - WithValidTargetDir(targetDir), - WithInvalidTargetDir(targetDir), - WithTmpDir(t.TempDir()), + WithInspector(q.Add), + WithGlobPattern("testfile*"), ) if err != nil { t.Fatalf("failed to create fai: %v", err) } - // run fai + // run fai (add files to queue) fai.Run(context.Background()) + // close queue and wait for all workers to finish + q.CloseAndWait() for _, testFile := range testFiles { - // check that test file has been moved - _, err = os.Stat(filepath.Join(testFile.expectedDir, filepath.Base(testFile.file.Name()))) - if err != nil { - t.Errorf("failed to stat test file '%v'", err) - } - // check that checksum file has been created and moved - _, err = os.Stat(filepath.Join(testFile.expectedDir, filepath.Base(testFile.file.Name())+".md5")) - if err != nil { - t.Errorf("failed to stat checksum file: %v", err) + // assert that test file is removed (as per worker function) + _, err = os.Stat(testFile.Name()) + if !errors.Is(err, os.ErrNotExist) { + t.Errorf("test file still exists: %s", testFile.Name()) } } } diff --git a/internal/fai/options.go b/internal/fai/options.go deleted file mode 100644 index b1087e8..0000000 --- a/internal/fai/options.go +++ /dev/null @@ -1,158 +0,0 @@ -package fai - -import ( - "errors" - "fmt" - "log/slog" - "os" - "path/filepath" - "runtime" - "strings" - "time" - - "github.com/nlnwa/fai/internal/log" -) - -type options struct { - sourceDir string - validTargetDir string - invalidTargetDir string - tmpDir string - concurrency int - sleep time.Duration - globPattern string - logger *slog.Logger -} - -func defaultOptions() *options { - return &options{ - sourceDir: "", - validTargetDir: "", - invalidTargetDir: "", - tmpDir: "", - concurrency: runtime.NumCPU(), - sleep: 1 * time.Second, - globPattern: "*", - logger: log.Noop(), - } -} - -func validateOptions(options ...Option) (*options, error) { - o := defaultOptions() - for _, opt := range options { - opt(o) - } - - if o.concurrency < 1 { - return nil, fmt.Errorf("concurrency must be greater than 0") - } - - var err error - - if (o.sourceDir == o.validTargetDir || o.sourceDir == o.invalidTargetDir) && (strings.HasSuffix(o.globPattern, "*") || strings.HasSuffix(o.globPattern, checksumFileSuffix)) { - return nil, fmt.Errorf("source and target directories cannot be the same when glob pattern is a wildcard or glob pattern ends with %s", checksumFileSuffix) - } - - // make sure source, temp and target directories are absolute paths - o.sourceDir, err = filepath.Abs(o.sourceDir) - if err != nil { - return nil, fmt.Errorf("failed to get absolute path of source directory: %w", err) - } - if info, err := os.Stat(o.sourceDir); errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("source directory does not exist: %w", err) - } else if !info.IsDir() { - return nil, fmt.Errorf("source directory is not a directory: %w", err) - } - - o.validTargetDir, err = filepath.Abs(o.validTargetDir) - if err != nil { - return nil, fmt.Errorf("failed to get absolute path of valid target directory: %w", err) - } - if info, err := os.Stat(o.validTargetDir); errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("valid target directory does not exist: %w", err) - } else if !info.IsDir() { - return nil, fmt.Errorf("valid target directory is not a directory: %w", err) - } - o.invalidTargetDir, err = filepath.Abs(o.invalidTargetDir) - if err != nil { - return nil, fmt.Errorf("failed to get absolute path of invalid target directory: %w", err) - } - if info, err := os.Stat(o.invalidTargetDir); errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("invalid target directory does not exist: %w", err) - } else if !info.IsDir() { - return nil, fmt.Errorf("invalid target directory is not a directory: %w", err) - } - o.tmpDir, err = filepath.Abs(o.tmpDir) - if err != nil { - return nil, fmt.Errorf("failed to get absolute path of tmp directory: %w", err) - } - if info, err := os.Stat(o.tmpDir); errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("tmp directory does not exist: %w", err) - } else if !info.IsDir() { - return nil, fmt.Errorf("tmp directory is not a directory: %w", err) - } - - o.globPattern = filepath.Join(o.sourceDir, o.globPattern) - - // Test glob pattern is valid - _, err = filepath.Glob(o.globPattern) - if err != nil { - return nil, fmt.Errorf("invalid glob pattern: %w", err) - } - - if o.logger == nil { - return nil, fmt.Errorf("logger cannot be nil") - } - - return o, nil -} - -type Option func(opts *options) - -func WithSourceDir(dir string) Option { - return func(opts *options) { - opts.sourceDir = dir - } -} - -func WithValidTargetDir(dir string) Option { - return func(opts *options) { - opts.validTargetDir = dir - } -} - -func WithInvalidTargetDir(dir string) Option { - return func(opts *options) { - opts.invalidTargetDir = dir - } -} - -func WithTmpDir(dir string) Option { - return func(opts *options) { - opts.tmpDir = dir - } -} - -func WithConcurrency(concurrency int) Option { - return func(opts *options) { - opts.concurrency = concurrency - } -} - -func WithSleep(sleep time.Duration) Option { - return func(opts *options) { - opts.sleep = sleep - } -} - -func WithGlobPattern(globPattern string) Option { - return func(opts *options) { - opts.globPattern = globPattern - } -} - -func WithLogger(logger *slog.Logger) Option { - return func(opts *options) { - opts.logger = logger - } -} diff --git a/internal/fai/options_test.go b/internal/fai/options_test.go deleted file mode 100644 index c20caea..0000000 --- a/internal/fai/options_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package fai - -import ( - "os" - "testing" -) - -func TestValidateOptions(t *testing.T) { - testDir := t.TempDir() - testFile, err := os.CreateTemp(testDir, "test") - if err != nil { - t.Fatalf("failed to create test file: %v", err) - } - - // Test glob pattern is a wildcard and source and valid target directory are the same - _, err = validateOptions( - WithSourceDir(testDir), - WithValidTargetDir(testDir), - WithGlobPattern("*"), - ) - if err == nil { - t.Error("expected error when source and valid target directory are the same and glob pattern is '*'") - } - - // Test glob pattern is a wildcard and source and invalid target directory are the same - _, err = validateOptions( - WithSourceDir(testDir), - WithInvalidTargetDir(testDir), - WithGlobPattern("*"), - ) - if err == nil { - t.Error("expected error when source and invalid target directory are the same and glob pattern is '*'") - } - - // Test glob pattern ends with checksum file suffix and source and target directory are the same - _, err = validateOptions( - WithSourceDir(testDir), - WithValidTargetDir(testDir), - WithGlobPattern("*."+checksumFileSuffix), - ) - if err == nil { - t.Errorf("expected error when source and target directory are the same and glob pattern ends with '%s'", checksumFileSuffix) - } - - // Test valid target directory is not a directory - _, err = validateOptions( - WithSourceDir(testDir), - WithValidTargetDir(testFile.Name()), - ) - if err == nil { - t.Error("expected error when valid target directory is not a directory") - } - - // Test concurrency cannot be zero - _, err = validateOptions( - WithConcurrency(0), - ) - if err == nil { - t.Error("expected error when concurrency is zero") - } - - // Test glob pattern is invalid - _, err = validateOptions( - WithGlobPattern("["), - ) - if err == nil { - t.Error("expected error when glob pattern is invalid") - } - - // Test logger cannot be nil - _, err = validateOptions( - WithLogger(nil), - ) - if err == nil { - t.Error("expected error when logger is nil") - } -} diff --git a/internal/fai/s3.go b/internal/fai/s3.go new file mode 100644 index 0000000..498d501 --- /dev/null +++ b/internal/fai/s3.go @@ -0,0 +1,121 @@ +package fai + +import ( + "context" + "crypto/md5" + "encoding/hex" + "errors" + "io" + "os" + "path/filepath" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" +) + +type s3options struct { + client *minio.Client + bucketName string + address string + accessKeyID string + secretAccessKey string + token string + useSSL bool +} + +type s3Option func(*s3options) + +func WithS3Address(address string) s3Option { + return func(opts *s3options) { + opts.address = address + } +} + +func WithS3AccessKeyID(accessKeyID string) s3Option { + return func(opts *s3options) { + opts.accessKeyID = accessKeyID + } +} + +func WithS3SecretAccessKey(secretAccessKey string) s3Option { + return func(opts *s3options) { + opts.secretAccessKey = secretAccessKey + } +} + +func WithS3Token(token string) s3Option { + return func(opts *s3options) { + opts.token = token + } +} + +func WithS3BucketName(bucket string) s3Option { + return func(opts *s3options) { + opts.bucketName = bucket + } +} + +func NewS3Uploader(options ...s3Option) (*s3options, error) { + o := new(s3options) + + for _, opt := range options { + opt(o) + } + if o.bucketName == "" { + return nil, errors.New("s3 bucket name is required") + } + + s3credentials := credentials.NewStaticV4(o.accessKeyID, o.secretAccessKey, o.token) + o.useSSL = true + s3Client, err := minio.New(o.address, &minio.Options{ + Creds: s3credentials, + Secure: o.useSSL, + }) + if err != nil { + return nil, err + } + o.client = s3Client + + return o, nil +} + +// Upload uploads the file at filePath to S3. +func (f *s3options) Upload(ctx context.Context, filePath string) (minio.UploadInfo, error) { + // Calculate the MD5 checksum of the file + md5sum, err := calculateMD5(filePath) + if err != nil { + return minio.UploadInfo{}, err + } + + bucketName := f.bucketName + objectName := filepath.Base(filePath) + options := minio.PutObjectOptions{ + UserMetadata: map[string]string{ + "x-amz-meta-md5": md5sum, + }, + } + + return f.client.FPutObject(ctx, bucketName, objectName, filePath, options) +} + +// calculateMD5 calculates the MD5 checksum of a given file. +func calculateMD5(filePath string) (string, error) { + // Open the file + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + // Create a new MD5 hash + hash := md5.New() + + // Copy file data into the hash + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + + // Get the MD5 sum and encode it as a hexadecimal string + md5sum := hex.EncodeToString(hash.Sum(nil)) + return md5sum, nil +} diff --git a/internal/log/noop.go b/internal/log/noop.go deleted file mode 100644 index 87d56a8..0000000 --- a/internal/log/noop.go +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2023 National Library of Norway. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package log - -import ( - "context" - "log/slog" -) - -// noopHandler is a slog.Handler that does nothing. -type noopHandler struct{} - -func (n noopHandler) Enabled(context.Context, slog.Level) bool { return false } -func (n noopHandler) Handle(context.Context, slog.Record) error { return nil } -func (n noopHandler) WithAttrs([]slog.Attr) slog.Handler { return n } -func (n noopHandler) WithGroup(string) slog.Handler { return n } - -func Noop() *slog.Logger { - return slog.New(noopHandler{}) -} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 50c68c0..f315978 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -17,12 +17,22 @@ package metrics import ( + "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" ) const namespace = "fai" +var duration = promauto.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "duration_seconds", + Help: "Duration of operations in seconds.", + // 1s, 10s, 30s, 1m, 10m, 30m + Buckets: []float64{1, 10, 30, 60, 600, 1800}, +}) + var filesize = promauto.NewHistogram(prometheus.HistogramOpts{ Namespace: namespace, Name: "file_size_bytes", @@ -31,18 +41,11 @@ var filesize = promauto.NewHistogram(prometheus.HistogramOpts{ Buckets: []float64{1000000, 100000000, 500000000, 1000000000}, }) -var validationError = promauto.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "validation_errors_total", - Help: "Number of files with validation errors.", -}) - -// ValidationError increments the validation error counter. -func ValidationError() { - validationError.Inc() -} - // Size records the size of the given file. func Size(size int64) { filesize.Observe(float64(size)) } + +func Duration(d time.Duration) { + duration.Observe(float64(d)) +} diff --git a/internal/warc/testdata/corrupt.warc.gz b/internal/warc/testdata/corrupt.warc.gz deleted file mode 100644 index 5dabe60..0000000 --- a/internal/warc/testdata/corrupt.warc.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c13fd08626a7564c16058fabbaa3785317268e9ee3f40eac94cc9858e51864b -size 55 diff --git a/internal/warc/testdata/invalid.warc.gz b/internal/warc/testdata/invalid.warc.gz deleted file mode 100644 index ac47116..0000000 --- a/internal/warc/testdata/invalid.warc.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2fffc1398c7712c34afda2f6a4dea5072a48357954176d775499ac31cd6cdbc4 -size 3649 diff --git a/internal/warc/testdata/valid.warc.gz b/internal/warc/testdata/valid.warc.gz deleted file mode 100644 index f3a4350..0000000 --- a/internal/warc/testdata/valid.warc.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d3f830f757a3733d56de5b76b5a3ccabb5de4cfdfed2559205b9c719ee6cbdb -size 1779 diff --git a/internal/warc/validate.go b/internal/warc/validate.go deleted file mode 100644 index 70d5b52..0000000 --- a/internal/warc/validate.go +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2023 National Library of Norway. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package warc - -import ( - "errors" - "io" - - "github.com/nlnwa/gowarc" -) - -func IsValid(file, tmpDir string) (bool, error) { - wf, err := gowarc.NewWarcFileReader(file, 0, - gowarc.WithBufferTmpDir(tmpDir), - ) - if err != nil { - return false, err - } - defer wf.Close() - - for { - wr, _, validation, err := wf.Next() - if errors.Is(err, io.EOF) { - return validation.Valid(), nil - } - if err != nil { - if wr != nil { - defer wr.Close() - } - return false, nil - } - func() { - defer wr.Close() - err = wr.ValidateDigest(validation) - if err != nil { - *validation = append(*validation, err) - } - }() - // stop processing if we have found an invalid record - if !validation.Valid() { - return false, nil - } - } -} diff --git a/internal/warc/validate_test.go b/internal/warc/validate_test.go deleted file mode 100644 index 8229d40..0000000 --- a/internal/warc/validate_test.go +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2023 National Library of Norway. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package warc - -import ( - "testing" -) - -const ( - validWarcFile = "testdata/valid.warc.gz" - invalidWarcFile = "testdata/invalid.warc.gz" - corruptedWarcFile = "testdata/corrupt.warc.gz" -) - -func TestMain(m *testing.M) { - m.Run() -} - -func TestIsValid(t *testing.T) { - tests := []struct { - file string - isValid bool - }{ - { - file: validWarcFile, - isValid: true, - }, - { - file: invalidWarcFile, - isValid: false, - }, - { - file: corruptedWarcFile, - isValid: false, - }, - } - - for _, test := range tests { - t.Run(test.file, func(t *testing.T) { - isValid, err := IsValid(test.file, "") - if err != nil { - t.Errorf("expected no error, got: %v", err) - } - if isValid != test.isValid { - t.Errorf("expected %v, got: %v", test.isValid, isValid) - } - }) - } -} diff --git a/main.go b/main.go index 72931a1..dfba7a1 100644 --- a/main.go +++ b/main.go @@ -18,42 +18,59 @@ package main import ( "context" - "flag" + "errors" "fmt" "log/slog" "net/http" "os" "os/signal" "runtime" + "strings" "syscall" "time" "github.com/nlnwa/fai/internal/fai" + "github.com/nlnwa/fai/internal/metrics" + "github.com/nlnwa/fai/internal/queue" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/spf13/pflag" + "github.com/spf13/viper" ) func main() { - sourceDir := "" - validTargetDir := "" - invalidTargetDir := "" - tmpDir := "" - - concurrency := runtime.NumCPU() - sleep := 5 * time.Second - pattern := "*" - metricsPort := 8081 - - flag.StringVar(&sourceDir, "source-dir", sourceDir, "path to source directory") - flag.StringVar(&validTargetDir, "valid-target-dir", validTargetDir, "path to target directory where valid files and their corresponding checksum files will be moved to") - flag.StringVar(&invalidTargetDir, "invalid-target-dir", invalidTargetDir, "path to target directory where invalid files and their corresponding checksum files will be moved to") - flag.StringVar(&tmpDir, "tmp-dir", tmpDir, "path to directory where temporary buffer files will be stored") - flag.IntVar(&concurrency, "concurrency", concurrency, "number of concurrent files processed") - flag.DurationVar(&sleep, "sleep", sleep, "sleep duration between directory listings, set to 0 to only do a single run") - flag.StringVar(&pattern, "pattern", pattern, "glob pattern used to match filenames in source directory") - flag.IntVar(&metricsPort, "metrics-port", metricsPort, "port to expose metrics on") - flag.Parse() - - logger := slog.Default() + pflag.String("dir", "", "path to source directory") + pflag.String("pattern", "*.warc.gz", "glob pattern used to match filenames in source directory") + pflag.Int("concurrency", runtime.NumCPU(), "number of files processed concurrently") + pflag.Duration("sleep", 5*time.Second, "sleep duration between directory listings, set to 0 to only do a single pass") + pflag.String("s3-address", "localhost:9000", "s3 endpoint (address:port)") + pflag.String("s3-bucket-name", "", "name of bucket to upload files to") + pflag.String("s3-access-key-id", "", "access key ID") + pflag.String("s3-secret-access-key", "", "secret access key") + pflag.String("s3-token", "", "token to use for s3 authentication (optional)") + pflag.Int("metrics-port", 8081, "port to expose metrics on") + pflag.Parse() + + logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) + + err := viper.BindPFlags(pflag.CommandLine) + if err != nil { + logger.Error("Failed to bind flags", "error", err) + os.Exit(1) + } + + viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) + viper.AutomaticEnv() + + sourceDir := viper.GetString("dir") + concurrency := viper.GetInt("concurrency") + sleep := viper.GetDuration("sleep") + globPattern := viper.GetString("pattern") + metricsAddr := fmt.Sprintf(":%d", viper.GetInt("metrics-port")) + s3bucketName := viper.GetString("s3-bucket-name") + s3address := viper.GetString("s3-address") + s3accessKeyID := viper.GetString("s3-access-key-id") + s3secretAccessKey := viper.GetString("s3-secret-access-key") + s3token := viper.GetString("s3-token") ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) defer cancel() @@ -61,26 +78,62 @@ func main() { go func() { defer cancel() http.Handle("/metrics", promhttp.Handler()) - err := http.ListenAndServe(fmt.Sprintf(":%d", metricsPort), nil) + err := http.ListenAndServe(metricsAddr, nil) + if errors.Is(err, http.ErrServerClosed) { + return + } if err != nil { - logger.Error("Failed to start metrics server", "error", err) + logger.Error("Metrics server failed", "error", err) } }() + s3uploader, err := fai.NewS3Uploader( + fai.WithS3Address(s3address), + fai.WithS3AccessKeyID(s3accessKeyID), + fai.WithS3SecretAccessKey(s3secretAccessKey), + fai.WithS3Token(s3token), + fai.WithS3BucketName(s3bucketName), + ) + if err != nil { + logger.Error("Failed to create S3 uploader", "error", err) + os.Exit(1) + } + logger.Info("S3 uploader", "bucket", s3bucketName, "address", s3address) + + worker := func(filePath string) { + start := time.Now() + info, err := s3uploader.Upload(ctx, filePath) + if err != nil { + logger.Error("Failed to upload file", "file", filePath, "error", err) + return + } + metrics.Duration(time.Since(start)) + metrics.Size(info.Size) + + logger.Info("Uploaded file", "key", info.Key, "size", info.Size, "etag", info.ETag) + + err = os.Remove(filePath) + if err != nil { + logger.Error("Failed to remove file", "file", filePath, "error", err) + } + } + + queue := queue.NewWorkQueue(worker, concurrency) + defer queue.CloseAndWait() + logger.Info("Work queue", "concurrency", concurrency) + f, err := fai.New( fai.WithSourceDir(sourceDir), - fai.WithValidTargetDir(validTargetDir), - fai.WithInvalidTargetDir(invalidTargetDir), - fai.WithTmpDir(tmpDir), - fai.WithConcurrency(concurrency), fai.WithSleep(sleep), - fai.WithGlobPattern(pattern), - fai.WithLogger(logger), + fai.WithGlobPattern(globPattern), + fai.WithInspector(queue.Add), ) if err != nil { logger.Error("", "error", err) os.Exit(1) } + logger.Info("Starting FAI", "sourceDir", sourceDir, "globPattern", globPattern, "sleep", sleep.String()) + f.Run(ctx) }