From 5be9d48c2ca5b21d64bdb583fe505d6c06c6ba63 Mon Sep 17 00:00:00 2001 From: Slach Date: Tue, 30 Jan 2024 12:33:16 +0400 Subject: [PATCH] fix `check_parts_columns` corner cases for AggregateFunction versioning, fix https://github.com/Altinity/clickhouse-backup/issues/819 --- ChangeLog.md | 4 ++ pkg/clickhouse/clickhouse.go | 83 +++++++++++---------------- pkg/clickhouse/clickhouse_test.go | 95 +++++++++++++++++++++++++++++++ pkg/clickhouse/structs.go | 6 ++ 4 files changed, 137 insertions(+), 51 deletions(-) create mode 100644 pkg/clickhouse/clickhouse_test.go diff --git a/ChangeLog.md b/ChangeLog.md index f95325a3..b0c5b541 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,7 @@ +# v2.4.24 +BUG FIXES +- fix `check_parts_columns` corner cases for AggregateFunction versioning, fix [819](https://github.com/Altinity/clickhouse-backup/issues/819) + # v2.4.23 IMPROVEMENTS - refactoring of `restore` command to allow parallel execution of `ALTER TABLE ... ATTACH PART` and improve parallelization of CopyObject during restore. diff --git a/pkg/clickhouse/clickhouse.go b/pkg/clickhouse/clickhouse.go index ab26f7de..4cb2a72a 100644 --- a/pkg/clickhouse/clickhouse.go +++ b/pkg/clickhouse/clickhouse.go @@ -1163,69 +1163,50 @@ func (ch *ClickHouse) CheckReplicationInProgress(table metadata.TableMetadata) ( return true, nil } -var isDateWithTzRE = regexp.MustCompile(`^Date[^(]+\(.+\)`) - // CheckSystemPartsColumns check data parts types consistency https://github.com/Altinity/clickhouse-backup/issues/529#issuecomment-1554460504 func (ch *ClickHouse) CheckSystemPartsColumns(ctx context.Context, table *Table) error { var err error - partColumnsDataTypes := make([]struct { - Column string `ch:"column"` - Types []string `ch:"uniq_types"` - }, 0) + partColumnsDataTypes := make([]ColumnDataTypes, 0) partsColumnsSQL := "SELECT column, groupUniqArray(type) AS uniq_types " + "FROM system.parts_columns " + - "WHERE active AND database=? AND table=? AND type NOT LIKE 'Enum%' AND type NOT LIKE 'Tuple(%' AND type NOT LIKE 'Array(Tuple(%' " + + "WHERE active AND database=? AND table=? AND type NOT LIKE 'Enum(%' AND type NOT LIKE 'Tuple(%' AND type NOT LIKE 'Array(Tuple(%' " + "GROUP BY column HAVING length(uniq_types) > 1" if err = ch.SelectContext(ctx, &partColumnsDataTypes, partsColumnsSQL, table.Database, table.Name); err != nil { return err } - isPartColumnsInconsistentDataTypes := false - if len(partColumnsDataTypes) > 0 { - for i := range partColumnsDataTypes { - isNullablePresent := false - isNotNullablePresent := false - isDatePresent := false - isDateTZPresent := false - var baseDataType string - for _, dataType := range partColumnsDataTypes[i].Types { - currentDataType := dataType - for _, compatiblePrefix := range []string{"Nullable(", "LowCardinality("} { - if strings.HasPrefix(currentDataType, compatiblePrefix) { - currentDataType = strings.TrimPrefix(currentDataType, compatiblePrefix) - currentDataType = strings.TrimSuffix(currentDataType, ")") - } - } - if strings.Contains(currentDataType, "(") { - currentDataType = currentDataType[:strings.Index(currentDataType, "(")] - } - if baseDataType == "" { - baseDataType = currentDataType - } else if baseDataType != currentDataType { - ch.Log.Errorf("`%s`.`%s` have incompatible data types %#v for \"%s\" column", table.Database, table.Name, partColumnsDataTypes[i].Types, partColumnsDataTypes[i].Column) - isPartColumnsInconsistentDataTypes = true - break - } - if strings.Contains(dataType, "Nullable") { - isNullablePresent = true - } else { - isNotNullablePresent = true - } - if strings.HasPrefix(dataType, "Date") { - isDatePresent = true - if !isDateTZPresent { - isDateTZPresent = isDateWithTzRE.MatchString(dataType) - } - } + return ch.CheckTypesConsistency(table, partColumnsDataTypes) +} + +var dateWithParams = regexp.MustCompile(`^(Date[^(]+)\([^)]+\)`) +var versioningAggregateRE = regexp.MustCompile(`^[0-9]+,\s*`) + +func (ch *ClickHouse) CheckTypesConsistency(table *Table, partColumnsDataTypes []ColumnDataTypes) error { + cleanType := func(dataType string) string { + for _, compatiblePrefix := range []string{"LowCardinality(", "Nullable("} { + if strings.HasPrefix(dataType, compatiblePrefix) { + dataType = strings.TrimPrefix(dataType, compatiblePrefix) + dataType = strings.TrimSuffix(dataType, ")") } - if !isNullablePresent && isNotNullablePresent { - if isDatePresent && isDateTZPresent { - continue - } - ch.Log.Errorf("`%s`.`%s` have inconsistent data types %#v for \"%s\" column", table.Database, table.Name, partColumnsDataTypes[i].Types, partColumnsDataTypes[i].Column) - isPartColumnsInconsistentDataTypes = true + } + dataType = dateWithParams.ReplaceAllString(dataType, "$1") + return dataType + } + for i := range partColumnsDataTypes { + isAggregationPresent := false + uniqTypes := common.EmptyMap{} + for _, dataType := range partColumnsDataTypes[i].Types { + isAggregationPresent = strings.HasPrefix(dataType, "AggregateFunction(") + if isAggregationPresent { + dataType = strings.TrimPrefix(dataType, "AggregateFunction(") + dataType = strings.TrimSuffix(dataType, ")") + dataType = versioningAggregateRE.ReplaceAllString(dataType, "") + } else { + dataType = cleanType(dataType) } + uniqTypes[dataType] = struct{}{} } - if isPartColumnsInconsistentDataTypes { + if len(uniqTypes) > 1 { + ch.Log.Errorf("`%s`.`%s` have incompatible data types %#v for \"%s\" column", table.Database, table.Name, partColumnsDataTypes[i].Types, partColumnsDataTypes[i].Column) return fmt.Errorf("`%s`.`%s` have inconsistent data types for active data part in system.parts_columns", table.Database, table.Name) } } diff --git a/pkg/clickhouse/clickhouse_test.go b/pkg/clickhouse/clickhouse_test.go new file mode 100644 index 00000000..34b93fac --- /dev/null +++ b/pkg/clickhouse/clickhouse_test.go @@ -0,0 +1,95 @@ +package clickhouse + +import ( + "fmt" + apexLog "github.com/apex/log" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCheckTypesConsistency(t *testing.T) { + ch := ClickHouse{ + Log: apexLog.WithField("logger", "clickhouse"), + } + table := &Table{ + Database: "mydb", + Name: "mytable", + } + expectedErr := fmt.Errorf("`mydb`.`mytable` have inconsistent data types for active data part in system.parts_columns") + + testCases := []struct { + Name string + PartColumnsData []ColumnDataTypes + ExpectedError error + }{ + { + Name: "No partColumnsData", + PartColumnsData: []ColumnDataTypes{}, + ExpectedError: nil, + }, + { + Name: "Consistent data types", + PartColumnsData: []ColumnDataTypes{ + { + Column: "col2", + Types: []string{"AggregateFunction(1, sumMap, Array(UInt16), Array(UInt64))", "AggregateFunction(sumMap, Array(UInt16), Array(UInt64))"}, + }, + { + Column: "col3", + Types: []string{"Nullable(Int32)", "Int32"}, + }, + { + Column: "col4", + Types: []string{"LowCardinality(String)", "String"}, + }, + { + Column: "col5", + Types: []string{"DateTime", "DateTime('Meteor/Chelyabinsk')"}, + }, + { + Column: "col6", + Types: []string{"LowCardinality(Nullable(String))", "String"}, + }, + }, + ExpectedError: nil, + }, + { + Name: "Inconsistent data types", + PartColumnsData: []ColumnDataTypes{ + { + Column: "col1", + Types: []string{"Int32", "String"}, + }, + }, + ExpectedError: expectedErr, + }, + { + Name: "Inconsistent AggregateFunction", + PartColumnsData: []ColumnDataTypes{ + { + Column: "col2", + Types: []string{"AggregateFunction(1, avg, Array(UInt16), Array(UInt64))", "AggregateFunction(sumMap, Array(UInt16), Array(UInt64))"}, + }, + }, + ExpectedError: expectedErr, + }, + { + Name: "Inconsistent Types #2", + PartColumnsData: []ColumnDataTypes{ + { + Column: "col2", + Types: []string{"DateTime(6)", "Date"}, + }, + }, + ExpectedError: expectedErr, + }, + } + + for _, tc := range testCases { + t.Run(tc.Name, func(t *testing.T) { + err := ch.CheckTypesConsistency(table, tc.PartColumnsData) + assert.Equal(t, tc.ExpectedError, err) + }) + } +} diff --git a/pkg/clickhouse/structs.go b/pkg/clickhouse/structs.go index dcbc7904..d865b6ae 100644 --- a/pkg/clickhouse/structs.go +++ b/pkg/clickhouse/structs.go @@ -79,3 +79,9 @@ type SystemBackups struct { UncompressedSize uint64 `ch:"uncompressed_size"` NumFiles uint64 `ch:"num_files"` } + +// ColumnDataTypes - info from system.parts_columns +type ColumnDataTypes struct { + Column string `ch:"column"` + Types []string `ch:"uniq_types"` +}