From ccd49e4d4b002460a3c391bd3dd795bc24c9a513 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 28 Sep 2023 17:01:34 +0200 Subject: [PATCH 1/6] Run tests to verify backend tag validation behavior --- internal/bundle/tags_test.go | 160 +++++++++++++++++++++++++++++++ internal/testutil/cloud.go | 50 ++++++++++ internal/testutil/requirement.go | 19 ++++ 3 files changed, 229 insertions(+) create mode 100644 internal/bundle/tags_test.go create mode 100644 internal/testutil/cloud.go create mode 100644 internal/testutil/requirement.go diff --git a/internal/bundle/tags_test.go b/internal/bundle/tags_test.go new file mode 100644 index 00000000000..8e70db28dc7 --- /dev/null +++ b/internal/bundle/tags_test.go @@ -0,0 +1,160 @@ +package bundle + +import ( + "context" + "strings" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func testTags(t *testing.T, tags map[string]string) error { + var nodeTypeId string + switch testutil.GetCloud(t) { + case testutil.AWS: + nodeTypeId = "i3.xlarge" + case testutil.Azure: + nodeTypeId = "Standard_DS4_v2" + case testutil.GCP: + nodeTypeId = "n1-standard-4" + } + + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + ctx := context.Background() + resp, err := w.Jobs.Create(ctx, jobs.CreateJob{ + Name: internal.RandomName("test-tags-"), + Tasks: []jobs.Task{ + { + TaskKey: "test", + NewCluster: &compute.ClusterSpec{ + SparkVersion: "13.3.x-scala2.12", + NumWorkers: 1, + NodeTypeId: nodeTypeId, + }, + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "/doesnt_exist.py", + }, + }, + }, + Tags: tags, + }) + + if resp != nil { + t.Cleanup(func() { + w.Jobs.DeleteByJobId(ctx, resp.JobId) + }) + } + + return err +} + +func testTagKey(t *testing.T, key string) error { + return testTags(t, map[string]string{ + key: "value", + }) +} + +func testTagValue(t *testing.T, value string) error { + return testTags(t, map[string]string{ + "key": value, + }) +} + +func TestAccTagKeyAWS(t *testing.T) { + testutil.Require(t, testutil.AWS) + t.Parallel() + + t.Run("invalid", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "café") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, `The key must match the regular expression ^[\d \w\+\-=\.:\/@]*$.`) + }) + + t.Run("empty", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, ` the minimal length is 1, and the maximum length is 127.`) + }) +} + +func TestAccTagValueAWS(t *testing.T) { + testutil.Require(t, testutil.AWS) + t.Parallel() + + err := testTagValue(t, "café") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, `The value must match the regular expression ^[\d \w\+\-=\.:/@]*$.`) +} + +func TestAccTagKeyAzure(t *testing.T) { + testutil.Require(t, testutil.Azure) + t.Parallel() + + t.Run("invalid", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "café?") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, `The key must match the regular expression ^[^<>\*&%;\\\/\+\?]*$.`) + }) + + t.Run("empty", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, ` the minimal length is 1, and the maximum length is 512.`) + }) +} + +func TestAccTagValueAzure(t *testing.T) { + testutil.Require(t, testutil.Azure) + t.Parallel() + + // Azure puts no constraings on tag values. + err := testTagValue(t, "café?") + require.NoError(t, err) +} + +func TestAccTagKeyGCP(t *testing.T) { + testutil.Require(t, testutil.GCP) + t.Parallel() + + t.Run("invalid", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "café?") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, `The key must match the regular expression ^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$.`) + }) + + t.Run("empty", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, ` the minimal length is 1, and the maximum length is 63.`) + }) +} + +func TestAccTagValueGCP(t *testing.T) { + testutil.Require(t, testutil.GCP) + t.Parallel() + + err := testTagValue(t, "café?") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, `The value must match the regular expression ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$.`) +} diff --git a/internal/testutil/cloud.go b/internal/testutil/cloud.go new file mode 100644 index 00000000000..e8f741fae86 --- /dev/null +++ b/internal/testutil/cloud.go @@ -0,0 +1,50 @@ +package testutil + +import ( + "testing" + + "github.com/databricks/cli/internal" +) + +type Cloud int + +const ( + AWS Cloud = iota + Azure + GCP +) + +// Implement [Requirement]. +func (c Cloud) Verify(t *testing.T) { + if c != GetCloud(t) { + t.Skipf("Skipping %s-specific test", c) + } +} + +func (c Cloud) String() string { + switch c { + case AWS: + return "AWS" + case Azure: + return "Azure" + case GCP: + return "GCP" + default: + return "unknown" + } +} + +func GetCloud(t *testing.T) Cloud { + env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV") + switch env { + case "aws": + return AWS + case "azure": + return Azure + case "gcp": + return GCP + default: + t.Fatalf("Unknown cloud environment: %s", env) + } + return -1 +} diff --git a/internal/testutil/requirement.go b/internal/testutil/requirement.go new file mode 100644 index 00000000000..53855e0b52f --- /dev/null +++ b/internal/testutil/requirement.go @@ -0,0 +1,19 @@ +package testutil + +import ( + "testing" +) + +// Requirement is the interface for test requirements. +type Requirement interface { + Verify(t *testing.T) +} + +// Require should be called at the beginning of a test to ensure that all +// requirements are met before running the test. +// If any requirement is not met, the test will be skipped. +func Require(t *testing.T, requirements ...Requirement) { + for _, r := range requirements { + r.Verify(t) + } +} From 7ea1f3530acf5899b3e41b4792a5b41238d89409 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 28 Sep 2023 17:04:14 +0200 Subject: [PATCH 2/6] Test unicode tag keys --- internal/bundle/tags_test.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/internal/bundle/tags_test.go b/internal/bundle/tags_test.go index 8e70db28dc7..220dd8afbbb 100644 --- a/internal/bundle/tags_test.go +++ b/internal/bundle/tags_test.go @@ -79,6 +79,14 @@ func TestAccTagKeyAWS(t *testing.T) { require.Contains(t, msg, `The key must match the regular expression ^[\d \w\+\-=\.:\/@]*$.`) }) + t.Run("unicode", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "🍎") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, ` contains non-latin1 characters.`) + }) + t.Run("empty", func(t *testing.T) { t.Parallel() err := testTagKey(t, "") @@ -110,6 +118,14 @@ func TestAccTagKeyAzure(t *testing.T) { require.Contains(t, msg, `The key must match the regular expression ^[^<>\*&%;\\\/\+\?]*$.`) }) + t.Run("unicode", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "🍎") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, ` contains non-latin1 characters.`) + }) + t.Run("empty", func(t *testing.T) { t.Parallel() err := testTagKey(t, "") @@ -140,6 +156,14 @@ func TestAccTagKeyGCP(t *testing.T) { require.Contains(t, msg, `The key must match the regular expression ^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$.`) }) + t.Run("unicode", func(t *testing.T) { + t.Parallel() + err := testTagKey(t, "🍎") + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, ` contains non-latin1 characters.`) + }) + t.Run("empty", func(t *testing.T) { t.Parallel() err := testTagKey(t, "") From 6f1ed032e4d46728fa41b36610dbfa13f0183ba4 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 28 Sep 2023 17:24:22 +0200 Subject: [PATCH 3/6] Reduce repetition --- internal/bundle/tags_test.go | 230 +++++++++++++++++++++++------------ 1 file changed, 153 insertions(+), 77 deletions(-) diff --git a/internal/bundle/tags_test.go b/internal/bundle/tags_test.go index 220dd8afbbb..443779e7aac 100644 --- a/internal/bundle/tags_test.go +++ b/internal/bundle/tags_test.go @@ -67,32 +67,59 @@ func testTagValue(t *testing.T, value string) error { }) } +type tagTestCase struct { + name string + value string + fn func(t *testing.T, value string) error + err string +} + +func runTagTestCases(t *testing.T, cases []tagTestCase) { + for i := range cases { + tc := cases[i] + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + err := tc.fn(t, tc.value) + if tc.err == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + msg := strings.ReplaceAll(err.Error(), "\n", " ") + require.Contains(t, msg, tc.err) + } + }) + } +} + func TestAccTagKeyAWS(t *testing.T) { testutil.Require(t, testutil.AWS) t.Parallel() - t.Run("invalid", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "café") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, `The key must match the regular expression ^[\d \w\+\-=\.:\/@]*$.`) - }) - - t.Run("unicode", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "🍎") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, ` contains non-latin1 characters.`) - }) - - t.Run("empty", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, ` the minimal length is 1, and the maximum length is 127.`) + runTagTestCases(t, []tagTestCase{ + { + name: "invalid", + value: "café", + fn: testTagKey, + err: ` The key must match the regular expression ^[\d \w\+\-=\.:\/@]*$.`, + }, + { + name: "unicode", + value: "🍎", + fn: testTagKey, + err: ` contains non-latin1 characters.`, + }, + { + name: "empty", + value: "", + fn: testTagKey, + err: ` the minimal length is 1, and the maximum length is 127.`, + }, + { + name: "valid", + value: "cafe", + fn: testTagKey, + err: ``, + }, }) } @@ -100,38 +127,57 @@ func TestAccTagValueAWS(t *testing.T) { testutil.Require(t, testutil.AWS) t.Parallel() - err := testTagValue(t, "café") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, `The value must match the regular expression ^[\d \w\+\-=\.:/@]*$.`) + runTagTestCases(t, []tagTestCase{ + { + name: "invalid", + value: "café", + fn: testTagValue, + err: ` The value must match the regular expression ^[\d \w\+\-=\.:/@]*$.`, + }, + { + name: "unicode", + value: "🍎", + fn: testTagValue, + err: ` contains non-latin1 characters.`, + }, + { + name: "valid", + value: "cafe", + fn: testTagValue, + err: ``, + }, + }) } func TestAccTagKeyAzure(t *testing.T) { testutil.Require(t, testutil.Azure) t.Parallel() - t.Run("invalid", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "café?") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, `The key must match the regular expression ^[^<>\*&%;\\\/\+\?]*$.`) - }) - - t.Run("unicode", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "🍎") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, ` contains non-latin1 characters.`) - }) - - t.Run("empty", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, ` the minimal length is 1, and the maximum length is 512.`) + runTagTestCases(t, []tagTestCase{ + { + name: "invalid", + value: "café?", + fn: testTagKey, + err: ` The key must match the regular expression ^[^<>\*&%;\\\/\+\?]*$.`, + }, + { + name: "unicode", + value: "🍎", + fn: testTagKey, + err: ` contains non-latin1 characters.`, + }, + { + name: "empty", + value: "", + fn: testTagKey, + err: ` the minimal length is 1, and the maximum length is 512.`, + }, + { + name: "valid", + value: "cafe", + fn: testTagKey, + err: ``, + }, }) } @@ -139,37 +185,51 @@ func TestAccTagValueAzure(t *testing.T) { testutil.Require(t, testutil.Azure) t.Parallel() - // Azure puts no constraings on tag values. - err := testTagValue(t, "café?") - require.NoError(t, err) + runTagTestCases(t, []tagTestCase{ + { + name: "unicode", + value: "🍎", + fn: testTagValue, + err: ` contains non-latin1 characters.`, + }, + { + name: "valid", + value: "cafe", + fn: testTagValue, + err: ``, + }, + }) } func TestAccTagKeyGCP(t *testing.T) { testutil.Require(t, testutil.GCP) t.Parallel() - t.Run("invalid", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "café?") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, `The key must match the regular expression ^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$.`) - }) - - t.Run("unicode", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "🍎") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, ` contains non-latin1 characters.`) - }) - - t.Run("empty", func(t *testing.T) { - t.Parallel() - err := testTagKey(t, "") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, ` the minimal length is 1, and the maximum length is 63.`) + runTagTestCases(t, []tagTestCase{ + { + name: "invalid", + value: "café?", + fn: testTagKey, + err: ` The key must match the regular expression ^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$.`, + }, + { + name: "unicode", + value: "🍎", + fn: testTagKey, + err: ` contains non-latin1 characters.`, + }, + { + name: "empty", + value: "", + fn: testTagKey, + err: ` the minimal length is 1, and the maximum length is 63.`, + }, + { + name: "valid", + value: "cafe", + fn: testTagKey, + err: ``, + }, }) } @@ -177,8 +237,24 @@ func TestAccTagValueGCP(t *testing.T) { testutil.Require(t, testutil.GCP) t.Parallel() - err := testTagValue(t, "café?") - require.Error(t, err) - msg := strings.ReplaceAll(err.Error(), "\n", " ") - require.Contains(t, msg, `The value must match the regular expression ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$.`) + runTagTestCases(t, []tagTestCase{ + { + name: "invalid", + value: "café", + fn: testTagValue, + err: ` The value must match the regular expression ^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$.`, + }, + { + name: "unicode", + value: "🍎", + fn: testTagValue, + err: ` contains non-latin1 characters.`, + }, + { + name: "valid", + value: "cafe", + fn: testTagValue, + err: ``, + }, + }) } From 3fb5671843dc072d0b9e007fbf0926b9d60ff7d7 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 28 Sep 2023 17:30:53 +0200 Subject: [PATCH 4/6] Keep internal/testutil dependency-free --- internal/testutil/cloud.go | 4 +--- internal/testutil/env.go | 9 +++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/testutil/cloud.go b/internal/testutil/cloud.go index e8f741fae86..50bbf67f254 100644 --- a/internal/testutil/cloud.go +++ b/internal/testutil/cloud.go @@ -2,8 +2,6 @@ package testutil import ( "testing" - - "github.com/databricks/cli/internal" ) type Cloud int @@ -35,7 +33,7 @@ func (c Cloud) String() string { } func GetCloud(t *testing.T) Cloud { - env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV") + env := GetEnvOrSkipTest(t, "CLOUD_ENV") switch env { case "aws": return AWS diff --git a/internal/testutil/env.go b/internal/testutil/env.go index 11a610189a0..39201c5b456 100644 --- a/internal/testutil/env.go +++ b/internal/testutil/env.go @@ -35,3 +35,12 @@ func CleanupEnvironment(t *testing.T) { t.Setenv("USERPROFILE", pwd) } } + +// GetEnvOrSkipTest proceeds with test only with that env variable +func GetEnvOrSkipTest(t *testing.T, name string) string { + value := os.Getenv(name) + if value == "" { + t.Skipf("Environment variable %s is missing", name) + } + return value +} From b584ecc08416e0429578f04acc29a0a1f0c3be63 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 28 Sep 2023 21:47:27 +0200 Subject: [PATCH 5/6] Move test to top level package --- internal/{bundle => }/tags_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename internal/{bundle => }/tags_test.go (98%) diff --git a/internal/bundle/tags_test.go b/internal/tags_test.go similarity index 98% rename from internal/bundle/tags_test.go rename to internal/tags_test.go index 443779e7aac..2dd3759acbd 100644 --- a/internal/bundle/tags_test.go +++ b/internal/tags_test.go @@ -1,11 +1,10 @@ -package bundle +package internal import ( "context" "strings" "testing" - "github.com/databricks/cli/internal" "github.com/databricks/cli/internal/testutil" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/compute" @@ -29,7 +28,7 @@ func testTags(t *testing.T, tags map[string]string) error { ctx := context.Background() resp, err := w.Jobs.Create(ctx, jobs.CreateJob{ - Name: internal.RandomName("test-tags-"), + Name: RandomName("test-tags-"), Tasks: []jobs.Task{ { TaskKey: "test", From 9b48b0e1f1d649d7e539387b8a31685ca6658afc Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 28 Sep 2023 22:13:16 +0200 Subject: [PATCH 6/6] Library to validate and normalize cloud specific tags --- libs/tags/aws.go | 36 +++++++++++++++ libs/tags/aws_test.go | 49 +++++++++++++++++++++ libs/tags/azure.go | 25 +++++++++++ libs/tags/azure_test.go | 34 +++++++++++++++ libs/tags/cloud.go | 32 ++++++++++++++ libs/tags/cloud_test.go | 32 ++++++++++++++ libs/tags/gcp.go | 63 +++++++++++++++++++++++++++ libs/tags/gcp_test.go | 65 +++++++++++++++++++++++++++ libs/tags/latin.go | 11 +++++ libs/tags/latin_test.go | 16 +++++++ libs/tags/tag.go | 57 ++++++++++++++++++++++++ libs/tags/transform.go | 87 +++++++++++++++++++++++++++++++++++++ libs/tags/transform_test.go | 25 +++++++++++ 13 files changed, 532 insertions(+) create mode 100644 libs/tags/aws.go create mode 100644 libs/tags/aws_test.go create mode 100644 libs/tags/azure.go create mode 100644 libs/tags/azure_test.go create mode 100644 libs/tags/cloud.go create mode 100644 libs/tags/cloud_test.go create mode 100644 libs/tags/gcp.go create mode 100644 libs/tags/gcp_test.go create mode 100644 libs/tags/latin.go create mode 100644 libs/tags/latin_test.go create mode 100644 libs/tags/tag.go create mode 100644 libs/tags/transform.go create mode 100644 libs/tags/transform_test.go diff --git a/libs/tags/aws.go b/libs/tags/aws.go new file mode 100644 index 00000000000..44d69c683e8 --- /dev/null +++ b/libs/tags/aws.go @@ -0,0 +1,36 @@ +package tags + +import ( + "regexp" + "unicode" + + "golang.org/x/text/unicode/rangetable" +) + +// The union of all characters allowed in AWS tags. +// This must be used only after filtering out non-Latin1 characters, +// because the [unicode] classes include non-Latin1 characters. +var awsChars = rangetable.Merge( + unicode.Digit, + unicode.Space, + unicode.Letter, + rangetable.New('+', '-', '=', '.', ':', '/', '@'), +) + +var awsTag = &tag{ + keyLength: 127, + keyPattern: regexp.MustCompile(`^[\d \w\+\-=\.:\/@]*$`), + keyNormalize: chain( + normalizeMarks(), + replaceNotIn(latin1, '_'), + replaceNotIn(awsChars, '_'), + ), + + valueLength: 255, + valuePattern: regexp.MustCompile(`^[\d \w\+\-=\.:/@]*$`), + valueNormalize: chain( + normalizeMarks(), + replaceNotIn(latin1, '_'), + replaceNotIn(awsChars, '_'), + ), +} diff --git a/libs/tags/aws_test.go b/libs/tags/aws_test.go new file mode 100644 index 00000000000..2a2bb7e7bd7 --- /dev/null +++ b/libs/tags/aws_test.go @@ -0,0 +1,49 @@ +package tags + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAwsNormalizeKey(t *testing.T) { + assert.Equal(t, "1 a b c", awsTag.NormalizeKey("1 a b c")) + assert.Equal(t, "+-=.:/@__", awsTag.NormalizeKey("+-=.:/@?)")) + assert.Equal(t, "test", awsTag.NormalizeKey("test")) + + // Remove marks; unicode becomes underscore. + assert.Equal(t, "cafe _", awsTag.NormalizeKey("café 🍎")) + + // Replace forbidden characters with underscore. + assert.Equal(t, "cafe __", awsTag.NormalizeKey("café 🍎?")) +} + +func TestAwsNormalizeValue(t *testing.T) { + assert.Equal(t, "1 a b c", awsTag.NormalizeValue("1 a b c")) + assert.Equal(t, "+-=.:/@__", awsTag.NormalizeValue("+-=.:/@?)")) + assert.Equal(t, "test", awsTag.NormalizeValue("test")) + + // Remove marks; unicode becomes underscore. + assert.Equal(t, "cafe _", awsTag.NormalizeValue("café 🍎")) + + // Replace forbidden characters with underscore. + assert.Equal(t, "cafe __", awsTag.NormalizeValue("café 🍎?")) +} + +func TestAwsValidateKey(t *testing.T) { + assert.ErrorContains(t, awsTag.ValidateKey(""), "not be empty") + assert.ErrorContains(t, awsTag.ValidateKey(strings.Repeat("a", 512)), "length") + assert.ErrorContains(t, awsTag.ValidateKey("café 🍎"), "latin") + assert.ErrorContains(t, awsTag.ValidateKey("????"), "pattern") + assert.NoError(t, awsTag.ValidateKey(strings.Repeat("a", 127))) + assert.NoError(t, awsTag.ValidateKey(awsTag.NormalizeKey("café 🍎"))) +} + +func TestAwsValidateValue(t *testing.T) { + assert.ErrorContains(t, awsTag.ValidateValue(strings.Repeat("a", 512)), "length") + assert.ErrorContains(t, awsTag.ValidateValue("café 🍎"), "latin1") + assert.ErrorContains(t, awsTag.ValidateValue("????"), "pattern") + assert.NoError(t, awsTag.ValidateValue(strings.Repeat("a", 127))) + assert.NoError(t, awsTag.ValidateValue(awsTag.NormalizeValue("café 🍎"))) +} diff --git a/libs/tags/azure.go b/libs/tags/azure.go new file mode 100644 index 00000000000..e98a5eb2d41 --- /dev/null +++ b/libs/tags/azure.go @@ -0,0 +1,25 @@ +package tags + +import ( + "regexp" + + "golang.org/x/text/unicode/rangetable" +) + +// All characters that may not be used in Azure tag keys. +var azureForbiddenChars = rangetable.New('<', '>', '*', '&', '%', ';', '\\', '/', '+', '?') + +var azureTag = &tag{ + keyLength: 512, + keyPattern: regexp.MustCompile(`^[^<>\*&%;\\\/\+\?]*$`), + keyNormalize: chain( + replaceNotIn(latin1, '_'), + replaceIn(azureForbiddenChars, '_'), + ), + + valueLength: 256, + valuePattern: regexp.MustCompile(`^.*$`), + valueNormalize: chain( + replaceNotIn(latin1, '_'), + ), +} diff --git a/libs/tags/azure_test.go b/libs/tags/azure_test.go new file mode 100644 index 00000000000..1deb5d6e6f3 --- /dev/null +++ b/libs/tags/azure_test.go @@ -0,0 +1,34 @@ +package tags + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAzureNormalizeKey(t *testing.T) { + assert.Equal(t, "test", azureTag.NormalizeKey("test")) + assert.Equal(t, "café __", azureTag.NormalizeKey("café 🍎?")) +} + +func TestAzureNormalizeValue(t *testing.T) { + assert.Equal(t, "test", azureTag.NormalizeValue("test")) + assert.Equal(t, "café _?", azureTag.NormalizeValue("café 🍎?")) +} + +func TestAzureValidateKey(t *testing.T) { + assert.ErrorContains(t, azureTag.ValidateKey(""), "not be empty") + assert.ErrorContains(t, azureTag.ValidateKey(strings.Repeat("a", 513)), "length") + assert.ErrorContains(t, azureTag.ValidateKey("café 🍎"), "latin") + assert.ErrorContains(t, azureTag.ValidateKey("????"), "pattern") + assert.NoError(t, azureTag.ValidateKey(strings.Repeat("a", 127))) + assert.NoError(t, azureTag.ValidateKey(azureTag.NormalizeKey("café 🍎"))) +} + +func TestAzureValidateValue(t *testing.T) { + assert.ErrorContains(t, azureTag.ValidateValue(strings.Repeat("a", 513)), "length") + assert.ErrorContains(t, azureTag.ValidateValue("café 🍎"), "latin") + assert.NoError(t, azureTag.ValidateValue(strings.Repeat("a", 127))) + assert.NoError(t, azureTag.ValidateValue(azureTag.NormalizeValue("café 🍎"))) +} diff --git a/libs/tags/cloud.go b/libs/tags/cloud.go new file mode 100644 index 00000000000..f423efa58bd --- /dev/null +++ b/libs/tags/cloud.go @@ -0,0 +1,32 @@ +package tags + +import "github.com/databricks/databricks-sdk-go/config" + +type Cloud interface { + // ValidateKey checks if a tag key can be used with the cloud provider. + ValidateKey(key string) error + + // ValidateValue checks if a tag value can be used with the cloud provider. + ValidateValue(value string) error + + // NormalizeKey normalizes a tag key for the cloud provider. + NormalizeKey(key string) string + + // NormalizeValue normalizes a tag value for the cloud provider. + NormalizeValue(value string) string +} + +func ForCloud(cfg *config.Config) Cloud { + var t *tag + switch { + case cfg.IsAws(): + t = awsTag + case cfg.IsAzure(): + t = azureTag + case cfg.IsGcp(): + t = gcpTag + default: + panic("unknown cloud provider") + } + return t +} diff --git a/libs/tags/cloud_test.go b/libs/tags/cloud_test.go new file mode 100644 index 00000000000..a1d04d88fee --- /dev/null +++ b/libs/tags/cloud_test.go @@ -0,0 +1,32 @@ +package tags + +import ( + "testing" + + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" +) + +func TestForCloudAws(t *testing.T) { + c := &config.Config{ + Host: "https://dbc-XXXXXXXX-YYYY.cloud.databricks.com/", + } + + assert.Equal(t, awsTag, ForCloud(c)) +} + +func TestForCloudAzure(t *testing.T) { + c := &config.Config{ + Host: "https://adb-xxx.y.azuredatabricks.net/", + } + + assert.Equal(t, azureTag, ForCloud(c)) +} + +func TestForCloudGcp(t *testing.T) { + c := &config.Config{ + Host: "https://123.4.gcp.databricks.com/", + } + + assert.Equal(t, gcpTag, ForCloud(c)) +} diff --git a/libs/tags/gcp.go b/libs/tags/gcp.go new file mode 100644 index 00000000000..f30ca4cae00 --- /dev/null +++ b/libs/tags/gcp.go @@ -0,0 +1,63 @@ +package tags + +import ( + "regexp" + "unicode" +) + +// Tag keys and values on GCP are limited to 63 characters and must match the +// regular expression `^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$`. +// For normalization, we define one table for the outer characters and +// one table for the inner characters. The outer table is used to trim +// leading and trailing characters, and the inner table is used to +// replace invalid characters with underscores. + +var gcpOuter = &unicode.RangeTable{ + R16: []unicode.Range16{ + // 0-9 + {0x0030, 0x0039, 1}, + // A-Z + {0x0041, 0x005A, 1}, + // a-z + {0x0061, 0x007A, 1}, + }, + LatinOffset: 3, +} + +var gcpInner = &unicode.RangeTable{ + R16: []unicode.Range16{ + // Hyphen-minus (dash) + {0x002D, 0x002D, 1}, + // Full stop (period) + {0x002E, 0x002E, 1}, + // 0-9 + {0x0030, 0x0039, 1}, + // A-Z + {0x0041, 0x005A, 1}, + // Low line (underscore) + {0x005F, 0x005F, 1}, + // a-z + {0x0061, 0x007A, 1}, + }, + LatinOffset: 6, +} + +var gcpTag = &tag{ + keyLength: 63, + keyPattern: regexp.MustCompile(`^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$`), + keyNormalize: chain( + normalizeMarks(), + replaceNotIn(latin1, '_'), + replaceNotIn(gcpInner, '_'), + trimIfNotIn(gcpOuter), + ), + + valueLength: 63, + valuePattern: regexp.MustCompile(`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`), + valueNormalize: chain( + normalizeMarks(), + replaceNotIn(latin1, '_'), + replaceNotIn(gcpInner, '_'), + trimIfNotIn(gcpOuter), + ), +} diff --git a/libs/tags/gcp_test.go b/libs/tags/gcp_test.go new file mode 100644 index 00000000000..89f4fd8e6be --- /dev/null +++ b/libs/tags/gcp_test.go @@ -0,0 +1,65 @@ +package tags + +import ( + "strings" + "testing" + "unicode" + + "github.com/stretchr/testify/assert" +) + +func TestGcpOuter(t *testing.T) { + assert.True(t, unicode.In('A', gcpOuter)) + assert.True(t, unicode.In('Z', gcpOuter)) + assert.True(t, unicode.In('a', gcpOuter)) + assert.True(t, unicode.In('z', gcpOuter)) + assert.True(t, unicode.In('0', gcpOuter)) + assert.True(t, unicode.In('9', gcpOuter)) + assert.False(t, unicode.In('-', gcpOuter)) + assert.False(t, unicode.In('.', gcpOuter)) + assert.False(t, unicode.In('_', gcpOuter)) + assert.False(t, unicode.In('!', gcpOuter)) +} + +func TestGcpInner(t *testing.T) { + assert.True(t, unicode.In('A', gcpInner)) + assert.True(t, unicode.In('Z', gcpInner)) + assert.True(t, unicode.In('a', gcpInner)) + assert.True(t, unicode.In('z', gcpInner)) + assert.True(t, unicode.In('0', gcpInner)) + assert.True(t, unicode.In('9', gcpInner)) + assert.True(t, unicode.In('-', gcpInner)) + assert.True(t, unicode.In('.', gcpInner)) + assert.True(t, unicode.In('_', gcpInner)) + assert.False(t, unicode.In('!', gcpInner)) +} + +func TestGcpNormalizeKey(t *testing.T) { + assert.Equal(t, "test", gcpTag.NormalizeKey("test")) + assert.Equal(t, "cafe", gcpTag.NormalizeKey("café 🍎?")) + assert.Equal(t, "cafe_foo", gcpTag.NormalizeKey("__café_foo__")) + +} + +func TestGcpNormalizeValue(t *testing.T) { + assert.Equal(t, "test", gcpTag.NormalizeValue("test")) + assert.Equal(t, "cafe", gcpTag.NormalizeValue("café 🍎?")) + assert.Equal(t, "cafe_foo", gcpTag.NormalizeValue("__café_foo__")) +} + +func TestGcpValidateKey(t *testing.T) { + assert.ErrorContains(t, gcpTag.ValidateKey(""), "not be empty") + assert.ErrorContains(t, gcpTag.ValidateKey(strings.Repeat("a", 64)), "length") + assert.ErrorContains(t, gcpTag.ValidateKey("café 🍎"), "latin") + assert.ErrorContains(t, gcpTag.ValidateKey("????"), "pattern") + assert.NoError(t, gcpTag.ValidateKey(strings.Repeat("a", 32))) + assert.NoError(t, gcpTag.ValidateKey(gcpTag.NormalizeKey("café 🍎"))) +} + +func TestGcpValidateValue(t *testing.T) { + assert.ErrorContains(t, gcpTag.ValidateValue(strings.Repeat("a", 64)), "length") + assert.ErrorContains(t, gcpTag.ValidateValue("café 🍎"), "latin") + assert.ErrorContains(t, gcpTag.ValidateValue("????"), "pattern") + assert.NoError(t, gcpTag.ValidateValue(strings.Repeat("a", 32))) + assert.NoError(t, gcpTag.ValidateValue(gcpTag.NormalizeValue("café 🍎"))) +} diff --git a/libs/tags/latin.go b/libs/tags/latin.go new file mode 100644 index 00000000000..df9ad403e7e --- /dev/null +++ b/libs/tags/latin.go @@ -0,0 +1,11 @@ +package tags + +import "unicode" + +// Range table for all characters in the Latin1 character set. +var latin1 = &unicode.RangeTable{ + R16: []unicode.Range16{ + {0x0000, 0x00ff, 1}, + }, + LatinOffset: 1, +} diff --git a/libs/tags/latin_test.go b/libs/tags/latin_test.go new file mode 100644 index 00000000000..c3234a44357 --- /dev/null +++ b/libs/tags/latin_test.go @@ -0,0 +1,16 @@ +package tags + +import ( + "testing" + "unicode" + + "github.com/stretchr/testify/assert" +) + +func TestLatinTable(t *testing.T) { + assert.True(t, unicode.In('\u0000', latin1)) + assert.True(t, unicode.In('A', latin1)) + assert.True(t, unicode.In('Z', latin1)) + assert.True(t, unicode.In('\u00ff', latin1)) + assert.False(t, unicode.In('\u0100', latin1)) +} diff --git a/libs/tags/tag.go b/libs/tags/tag.go new file mode 100644 index 00000000000..4e9b329ca23 --- /dev/null +++ b/libs/tags/tag.go @@ -0,0 +1,57 @@ +package tags + +import ( + "fmt" + "regexp" + "strings" + "unicode" +) + +// The tag type holds the validation and normalization rules for +// a cloud provider's resource tags as applied by Databricks. +type tag struct { + keyLength int + keyPattern *regexp.Regexp + keyNormalize transformer + + valueLength int + valuePattern *regexp.Regexp + valueNormalize transformer +} + +func (t *tag) ValidateKey(s string) error { + if len(s) == 0 { + return fmt.Errorf("key must not be empty") + } + if len(s) > t.keyLength { + return fmt.Errorf("key length %d exceeds maximum of %d", len(s), t.keyLength) + } + if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(latin1, r) }) { + return fmt.Errorf("key contains non-latin1 characters") + } + if !t.keyPattern.MatchString(s) { + return fmt.Errorf("key %q does not match pattern %q", s, t.keyPattern) + } + return nil +} + +func (t *tag) ValidateValue(s string) error { + if len(s) > t.valueLength { + return fmt.Errorf("value length %d exceeds maximum of %d", len(s), t.valueLength) + } + if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(latin1, r) }) { + return fmt.Errorf("value contains non-latin1 characters") + } + if !t.valuePattern.MatchString(s) { + return fmt.Errorf("value %q does not match pattern %q", s, t.valuePattern) + } + return nil +} + +func (t *tag) NormalizeKey(s string) string { + return t.keyNormalize.transform(s) +} + +func (t *tag) NormalizeValue(s string) string { + return t.valueNormalize.transform(s) +} diff --git a/libs/tags/transform.go b/libs/tags/transform.go new file mode 100644 index 00000000000..71d01b35633 --- /dev/null +++ b/libs/tags/transform.go @@ -0,0 +1,87 @@ +package tags + +import ( + "strings" + "unicode" + + "golang.org/x/text/runes" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" +) + +type transformer interface { + transform(string) string +} + +type chainTransformer []transformer + +func (c chainTransformer) transform(s string) string { + for _, t := range c { + s = t.transform(s) + } + return s +} + +func chain(t ...transformer) transformer { + return chainTransformer(t) +} + +// Implement [transformer] interface with text/transform package. +type textTransformer struct { + transform.Transformer +} + +func (t textTransformer) transform(s string) string { + s, _, _ = transform.String(t, s) + return s +} + +func normalizeMarks() transformer { + // Decompose unicode characters, then remove all non-spacing marks, then recompose. + // This turns 'é' into 'e' and 'ü' into 'u'. + return textTransformer{ + transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC), + } +} + +// Replaces characters in the given set with replacement. +type replaceTransformer struct { + set runes.Set + replacement rune +} + +func (t replaceTransformer) transform(s string) string { + return strings.Map(func(r rune) rune { + if t.set.Contains(r) { + return t.replacement + } + return r + }, s) +} + +func replaceIn(table *unicode.RangeTable, replacement rune) transformer { + return replaceTransformer{runes.In(table), replacement} +} + +func replaceNotIn(table *unicode.RangeTable, replacement rune) transformer { + return replaceTransformer{runes.NotIn(table), replacement} +} + +// Trims the given string of all characters in the given set. +type trimTransformer struct { + set runes.Set +} + +func (t trimTransformer) transform(s string) string { + return strings.TrimFunc(s, func(r rune) bool { + return t.set.Contains(r) + }) +} + +func trimIfIn(table *unicode.RangeTable) transformer { + return trimTransformer{runes.In(table)} +} + +func trimIfNotIn(table *unicode.RangeTable) transformer { + return trimTransformer{runes.NotIn(table)} +} diff --git a/libs/tags/transform_test.go b/libs/tags/transform_test.go new file mode 100644 index 00000000000..6481b6d9bcb --- /dev/null +++ b/libs/tags/transform_test.go @@ -0,0 +1,25 @@ +package tags + +import ( + "testing" + "unicode" + + "github.com/stretchr/testify/assert" +) + +func TestNormalizeMarks(t *testing.T) { + x := normalizeMarks() + assert.Equal(t, "cafe", x.transform("café")) + assert.Equal(t, "cafe 🍎", x.transform("café 🍎")) + assert.Equal(t, "Foo Bar", x.transform("Foo Bar")) +} + +func TestReplace(t *testing.T) { + assert.Equal(t, "___abc___", replaceIn(unicode.Digit, '_').transform("000abc999")) + assert.Equal(t, "___000___", replaceNotIn(unicode.Digit, '_').transform("abc000abc")) +} + +func TestTrim(t *testing.T) { + assert.Equal(t, "abc", trimIfIn(unicode.Digit).transform("000abc999")) + assert.Equal(t, "000", trimIfNotIn(unicode.Digit).transform("abc000abc")) +}