Skip to content

Commit

Permalink
fix check_parts_columns corner cases for AggregateFunction versioni…
Browse files Browse the repository at this point in the history
…ng, fix #819
  • Loading branch information
Slach committed Jan 30, 2024
1 parent 2d4e7e8 commit 5be9d48
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 51 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# v2.4.24
BUG FIXES
- fix `check_parts_columns` corner cases for AggregateFunction versioning, fix [819](https://github.com/Altinity/clickhouse-backup/issues/819)

# v2.4.23
IMPROVEMENTS
- refactoring of `restore` command to allow parallel execution of `ALTER TABLE ... ATTACH PART` and improve parallelization of CopyObject during restore.
Expand Down
83 changes: 32 additions & 51 deletions pkg/clickhouse/clickhouse.go
Original file line number Diff line number Diff line change
Expand Up @@ -1163,69 +1163,50 @@ func (ch *ClickHouse) CheckReplicationInProgress(table metadata.TableMetadata) (
return true, nil
}

var isDateWithTzRE = regexp.MustCompile(`^Date[^(]+\(.+\)`)

// CheckSystemPartsColumns check data parts types consistency https://github.com/Altinity/clickhouse-backup/issues/529#issuecomment-1554460504
func (ch *ClickHouse) CheckSystemPartsColumns(ctx context.Context, table *Table) error {
var err error
partColumnsDataTypes := make([]struct {
Column string `ch:"column"`
Types []string `ch:"uniq_types"`
}, 0)
partColumnsDataTypes := make([]ColumnDataTypes, 0)
partsColumnsSQL := "SELECT column, groupUniqArray(type) AS uniq_types " +
"FROM system.parts_columns " +
"WHERE active AND database=? AND table=? AND type NOT LIKE 'Enum%' AND type NOT LIKE 'Tuple(%' AND type NOT LIKE 'Array(Tuple(%' " +
"WHERE active AND database=? AND table=? AND type NOT LIKE 'Enum(%' AND type NOT LIKE 'Tuple(%' AND type NOT LIKE 'Array(Tuple(%' " +
"GROUP BY column HAVING length(uniq_types) > 1"
if err = ch.SelectContext(ctx, &partColumnsDataTypes, partsColumnsSQL, table.Database, table.Name); err != nil {
return err
}
isPartColumnsInconsistentDataTypes := false
if len(partColumnsDataTypes) > 0 {
for i := range partColumnsDataTypes {
isNullablePresent := false
isNotNullablePresent := false
isDatePresent := false
isDateTZPresent := false
var baseDataType string
for _, dataType := range partColumnsDataTypes[i].Types {
currentDataType := dataType
for _, compatiblePrefix := range []string{"Nullable(", "LowCardinality("} {
if strings.HasPrefix(currentDataType, compatiblePrefix) {
currentDataType = strings.TrimPrefix(currentDataType, compatiblePrefix)
currentDataType = strings.TrimSuffix(currentDataType, ")")
}
}
if strings.Contains(currentDataType, "(") {
currentDataType = currentDataType[:strings.Index(currentDataType, "(")]
}
if baseDataType == "" {
baseDataType = currentDataType
} else if baseDataType != currentDataType {
ch.Log.Errorf("`%s`.`%s` have incompatible data types %#v for \"%s\" column", table.Database, table.Name, partColumnsDataTypes[i].Types, partColumnsDataTypes[i].Column)
isPartColumnsInconsistentDataTypes = true
break
}
if strings.Contains(dataType, "Nullable") {
isNullablePresent = true
} else {
isNotNullablePresent = true
}
if strings.HasPrefix(dataType, "Date") {
isDatePresent = true
if !isDateTZPresent {
isDateTZPresent = isDateWithTzRE.MatchString(dataType)
}
}
return ch.CheckTypesConsistency(table, partColumnsDataTypes)
}

var dateWithParams = regexp.MustCompile(`^(Date[^(]+)\([^)]+\)`)
var versioningAggregateRE = regexp.MustCompile(`^[0-9]+,\s*`)

func (ch *ClickHouse) CheckTypesConsistency(table *Table, partColumnsDataTypes []ColumnDataTypes) error {
cleanType := func(dataType string) string {
for _, compatiblePrefix := range []string{"LowCardinality(", "Nullable("} {
if strings.HasPrefix(dataType, compatiblePrefix) {
dataType = strings.TrimPrefix(dataType, compatiblePrefix)
dataType = strings.TrimSuffix(dataType, ")")
}
if !isNullablePresent && isNotNullablePresent {
if isDatePresent && isDateTZPresent {
continue
}
ch.Log.Errorf("`%s`.`%s` have inconsistent data types %#v for \"%s\" column", table.Database, table.Name, partColumnsDataTypes[i].Types, partColumnsDataTypes[i].Column)
isPartColumnsInconsistentDataTypes = true
}
dataType = dateWithParams.ReplaceAllString(dataType, "$1")
return dataType
}
for i := range partColumnsDataTypes {
isAggregationPresent := false
uniqTypes := common.EmptyMap{}
for _, dataType := range partColumnsDataTypes[i].Types {
isAggregationPresent = strings.HasPrefix(dataType, "AggregateFunction(")
if isAggregationPresent {
dataType = strings.TrimPrefix(dataType, "AggregateFunction(")
dataType = strings.TrimSuffix(dataType, ")")
dataType = versioningAggregateRE.ReplaceAllString(dataType, "")
} else {
dataType = cleanType(dataType)
}
uniqTypes[dataType] = struct{}{}
}
if isPartColumnsInconsistentDataTypes {
if len(uniqTypes) > 1 {
ch.Log.Errorf("`%s`.`%s` have incompatible data types %#v for \"%s\" column", table.Database, table.Name, partColumnsDataTypes[i].Types, partColumnsDataTypes[i].Column)
return fmt.Errorf("`%s`.`%s` have inconsistent data types for active data part in system.parts_columns", table.Database, table.Name)
}
}
Expand Down
95 changes: 95 additions & 0 deletions pkg/clickhouse/clickhouse_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package clickhouse

import (
"fmt"
apexLog "github.com/apex/log"
"testing"

"github.com/stretchr/testify/assert"
)

func TestCheckTypesConsistency(t *testing.T) {
ch := ClickHouse{
Log: apexLog.WithField("logger", "clickhouse"),
}
table := &Table{
Database: "mydb",
Name: "mytable",
}
expectedErr := fmt.Errorf("`mydb`.`mytable` have inconsistent data types for active data part in system.parts_columns")

testCases := []struct {
Name string
PartColumnsData []ColumnDataTypes
ExpectedError error
}{
{
Name: "No partColumnsData",
PartColumnsData: []ColumnDataTypes{},
ExpectedError: nil,
},
{
Name: "Consistent data types",
PartColumnsData: []ColumnDataTypes{
{
Column: "col2",
Types: []string{"AggregateFunction(1, sumMap, Array(UInt16), Array(UInt64))", "AggregateFunction(sumMap, Array(UInt16), Array(UInt64))"},
},
{
Column: "col3",
Types: []string{"Nullable(Int32)", "Int32"},
},
{
Column: "col4",
Types: []string{"LowCardinality(String)", "String"},
},
{
Column: "col5",
Types: []string{"DateTime", "DateTime('Meteor/Chelyabinsk')"},
},
{
Column: "col6",
Types: []string{"LowCardinality(Nullable(String))", "String"},
},
},
ExpectedError: nil,
},
{
Name: "Inconsistent data types",
PartColumnsData: []ColumnDataTypes{
{
Column: "col1",
Types: []string{"Int32", "String"},
},
},
ExpectedError: expectedErr,
},
{
Name: "Inconsistent AggregateFunction",
PartColumnsData: []ColumnDataTypes{
{
Column: "col2",
Types: []string{"AggregateFunction(1, avg, Array(UInt16), Array(UInt64))", "AggregateFunction(sumMap, Array(UInt16), Array(UInt64))"},
},
},
ExpectedError: expectedErr,
},
{
Name: "Inconsistent Types #2",
PartColumnsData: []ColumnDataTypes{
{
Column: "col2",
Types: []string{"DateTime(6)", "Date"},
},
},
ExpectedError: expectedErr,
},
}

for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) {
err := ch.CheckTypesConsistency(table, tc.PartColumnsData)
assert.Equal(t, tc.ExpectedError, err)
})
}
}
6 changes: 6 additions & 0 deletions pkg/clickhouse/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,9 @@ type SystemBackups struct {
UncompressedSize uint64 `ch:"uncompressed_size"`
NumFiles uint64 `ch:"num_files"`
}

// ColumnDataTypes - info from system.parts_columns
type ColumnDataTypes struct {
Column string `ch:"column"`
Types []string `ch:"uniq_types"`
}

0 comments on commit 5be9d48

Please sign in to comment.