Jelajahi Sumber

parallel upload

apkipa 4 hari lalu
induk
melakukan
890fed19aa
5 mengubah file dengan 79 tambahan dan 31 penghapusan
  1. 1 0
      config/application.yaml
  2. 1 0
      go.mod
  3. 2 0
      go.sum
  4. 70 31
      main.go
  5. 5 0
      util/util.go

+ 1 - 0
config/application.yaml

@@ -19,3 +19,4 @@ main:
   uploadRetryMaxTimes: 20
   failedRetryDelaySeconds: 5
   notifyToUploadDelaySeconds: 1
+  parallelUploadThreadsCount: 2

+ 1 - 0
go.mod

@@ -7,6 +7,7 @@ replace stck/stck-nsq-msg => ./stck-nsq-msg
 require (
 	github.com/ClickHouse/ch-go v0.61.5
 	github.com/ClickHouse/clickhouse-go/v2 v2.30.0
+	github.com/Jeffail/tunny v0.1.4
 	github.com/fsnotify/fsnotify v1.7.0
 	github.com/klauspost/compress v1.17.11
 	github.com/minio/minio-go/v7 v7.0.80

+ 2 - 0
go.sum

@@ -4,6 +4,8 @@ github.com/ClickHouse/ch-go v0.61.5 h1:zwR8QbYI0tsMiEcze/uIMK+Tz1D3XZXLdNrlaOpeE
 github.com/ClickHouse/ch-go v0.61.5/go.mod h1:s1LJW/F/LcFs5HJnuogFMta50kKDO0lf9zzfrbl0RQg=
 github.com/ClickHouse/clickhouse-go/v2 v2.30.0 h1:AG4D/hW39qa58+JHQIFOSnxyL46H6h2lrmGGk17dhFo=
 github.com/ClickHouse/clickhouse-go/v2 v2.30.0/go.mod h1:i9ZQAojcayW3RsdCb3YR+n+wC2h65eJsZCscZ1Z1wyo=
+github.com/Jeffail/tunny v0.1.4 h1:chtpdz+nUtaYQeCKlNBg6GycFF/kGVHOr6A3cmzTJXs=
+github.com/Jeffail/tunny v0.1.4/go.mod h1:P8xAx4XQl0xsuhjX1DtfaMDCSuavzdb2rwbd0lk+fvo=
 github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
 github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

+ 70 - 31
main.go

@@ -26,6 +26,7 @@ import (
 	"github.com/ClickHouse/ch-go/proto"
 	"github.com/ClickHouse/clickhouse-go/v2"
 	"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
+	"github.com/Jeffail/tunny"
 	"github.com/fsnotify/fsnotify"
 	"github.com/minio/minio-go/v7"
 	"github.com/minio/minio-go/v7/pkg/credentials"
@@ -60,6 +61,7 @@ type AppInitConfig struct {
 		UploadRetryMaxTimes        int `mapstructure:"uploadRetryMaxTimes"`
 		FailedRetryDelaySeconds    int `mapstructure:"failedRetryDelaySeconds"`
 		NotifyToUploadDelaySeconds int `mapstructure:"notifyToUploadDelaySeconds"`
+		ParallelUploadThreadsCount int `mapstructure:"parallelUploadThreadsCount"`
 	} `mapstructure:"main"`
 }
 
@@ -110,6 +112,7 @@ func initLoadConfig() {
 	viper.SetDefault("main.uploadRetryMaxTimes", 20)
 	viper.SetDefault("main.failedRetryDelaySeconds", 5)
 	viper.SetDefault("main.notifyToUploadDelaySeconds", 1)
+	viper.SetDefault("main.parallelUploadThreadsCount", 2)
 	viper.SetConfigFile("./config/application.yaml")
 	viper.WatchConfig()
 	viper.OnConfigChange(func(e fsnotify.Event) {
@@ -651,6 +654,57 @@ func upload_one_stream(app AppCtx, streamName string) (fullyUploaded bool, err e
 	logger.Tracef("Listing minio objects in `%s`, bucket `%s`", streamObjPath, appInitCfg.Minio.Bucket)
 	// hasSomething := false
 	hasMetadata := false
+
+	var wg sync.WaitGroup
+	var mt sync.Mutex
+	pool := tunny.NewFunc(appInitCfg.Main.ParallelUploadThreadsCount, func(payload interface{}) interface{} {
+		packed := payload.(util.Pair[string, PartUploadArgs])
+		partName := packed.First
+		partInfo := packed.Second
+
+		objStat := StreamObjectUploadStatistics{
+			StartTime: time.Now(),
+			PartName:  partName,
+		}
+
+		logger.Infof("Uploading part `%s` (total %d) of stream `%s`, total_points=%d",
+			partInfo.PartName, partInfo.StreamInfo.PartsCount,
+			partInfo.StreamName, partInfo.StreamInfo.TotalPoints)
+
+		err := upload_one_part(app, partInfo.StreamInfo, partInfo.StreamName, partInfo.PartName)
+		if err != nil {
+			objStat.EndTime = time.Now()
+			objStat.UpState = "fail"
+			objStat.Msg = err.Error()
+
+			logger.Warnf("Failed to upload part `%s` of stream `%s` (took %v): %v", partInfo.PartName, partInfo.StreamName,
+				objStat.EndTime.Sub(objStat.StartTime), err)
+			fullyUploaded = false
+		} else {
+			// Mark the part as uploaded
+			//err = app.db.Create(&PartUploadRecord{StreamName: partInfo.StreamName, PartName: partInfo.PartName}).Error
+			part := PartUploadRecord{StreamName: partInfo.StreamName, PartName: partName}
+			err = app.db.Where(part).FirstOrCreate(&PartUploadRecord{}).Error
+			if err != nil {
+				logger.Warnf("Failed to mark part `%s` of stream `%s` as uploaded: %v", partInfo.PartName, partInfo.StreamName, err)
+			}
+
+			objStat.EndTime = time.Now()
+			objStat.UpState = "ok"
+
+			logger.Infof("Uploaded part `%s` of stream `%s`, took %v", partInfo.PartName, streamName,
+				objStat.EndTime.Sub(objStat.StartTime))
+		}
+
+		func() {
+			mt.Lock()
+			defer mt.Unlock()
+			streamStats.Objects[partInfo.PartName] = objStat
+		}()
+
+		return nil
+	})
+
 	for objInfo := range app.minioClient.ListObjects(context.Background(), appInitCfg.Minio.Bucket, options) {
 		if objInfo.Err != nil {
 			return false, objInfo.Err
@@ -682,7 +736,11 @@ func upload_one_stream(app AppCtx, streamName string) (fullyUploaded bool, err e
 		if part_already_uploaded(app, streamName, partName) {
 			objStat.EndTime = time.Now()
 			objStat.UpState = "repeated"
-			streamStats.Objects[objInfo.Key] = objStat
+			func() {
+				mt.Lock()
+				defer mt.Unlock()
+				streamStats.Objects[objInfo.Key] = objStat
+			}()
 
 			logger.Infof("Part `%s` of stream `%s` is already uploaded", objInfo.Key, streamName)
 			continue
@@ -694,38 +752,16 @@ func upload_one_stream(app AppCtx, streamName string) (fullyUploaded bool, err e
 
 		// Do the parts upload
 		partInfo := PartUploadArgs{StreamInfo: streamInfo, StreamName: streamName, PartName: objInfo.Key}
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			pool.Process(util.Pair[string, PartUploadArgs]{First: partName, Second: partInfo})
+		}()
+	}
 
-		logger.Infof("Uploading part `%s` (total %d) of stream `%s`, total_points=%d",
-			partInfo.PartName, partInfo.StreamInfo.PartsCount,
-			partInfo.StreamName, partInfo.StreamInfo.TotalPoints)
-
-		err := upload_one_part(app, partInfo.StreamInfo, partInfo.StreamName, partInfo.PartName)
-		if err != nil {
-			objStat.EndTime = time.Now()
-			objStat.UpState = "fail"
-			objStat.Msg = err.Error()
-
-			logger.Warnf("Failed to upload part `%s` of stream `%s` (took %v): %v", partInfo.PartName, partInfo.StreamName,
-				objStat.EndTime.Sub(objStat.StartTime), err)
-			fullyUploaded = false
-		} else {
-			// Mark the part as uploaded
-			//err = app.db.Create(&PartUploadRecord{StreamName: partInfo.StreamName, PartName: partInfo.PartName}).Error
-			part := PartUploadRecord{StreamName: partInfo.StreamName, PartName: partName}
-			err = app.db.Where(part).FirstOrCreate(&PartUploadRecord{}).Error
-			if err != nil {
-				logger.Warnf("Failed to mark part `%s` of stream `%s` as uploaded: %v", partInfo.PartName, partInfo.StreamName, err)
-			}
-
-			objStat.EndTime = time.Now()
-			objStat.UpState = "ok"
-
-			logger.Infof("Uploaded part `%s` of stream `%s`, took %v", objInfo.Key, streamName,
-				objStat.EndTime.Sub(objStat.StartTime))
-		}
+	wg.Wait()
+	pool.Close()
 
-		streamStats.Objects[objInfo.Key] = objStat
-	}
 	if !hasMetadata {
 		logger.Warnf("Stream `%s` has no metadata file, will retry later", streamName)
 		fullyUploaded = false
@@ -853,6 +889,9 @@ func upload_one_part(app AppCtx, streamInfo *StreamMetadata, streamName string,
 			err = c.Do(ctx, ch.Query{
 				Body:  sql,
 				Input: input,
+				// Settings: []ch.Setting{
+				// 	ch.SettingInt("max_insert_threads", 2),
+				// },
 			})
 			if err != nil {
 				return fmt.Errorf("failed to insert part into stck: %w", err)

+ 5 - 0
util/util.go

@@ -112,3 +112,8 @@ func ExtractNumberFromString(filename string) (int64, error) {
 func ToSqlLiteral(s string) string {
 	return "'" + strings.ReplaceAll(s, "'", "''") + "'"
 }
+
+type Pair[T, U any] struct {
+    First  T
+    Second U
+}