databricks · janniklasrose · Jun 12, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md
@@ -11,6 +11,7 @@
 * Remove API enum values and types that are still in development from the `databricks-bundles` Python package; these were never accepted by the backend ([#5484](https://github.com/databricks/cli/pull/5484)).
 * direct: Fix resolving a resource reference that is used more than once within the same field ([#5558](https://github.com/databricks/cli/pull/5558)).
 * Bundle variable references now accept Unicode letters in path segments (e.g. `${var.变量}`). ([#5532](https://github.com/databricks/cli/pull/5532))
+* Ignore remote changes for vector search direct_access_index_spec.schema_json to prevent drift when the backend normalizes the schema ([#5481](https://github.com/databricks/cli/pull/5481)).
 
 ### Dependency updates
 

diff --git a/acceptance/bundle/deployment/bind/vector_search_index/databricks.yml.tmpl b/acceptance/bundle/deployment/bind/vector_search_index/databricks.yml.tmpl
@@ -12,7 +12,7 @@ resources:
       primary_key: id
       index_type: DIRECT_ACCESS
       direct_access_index_spec:
-        schema_json: '{"id":"integer","vector":"array<float>"}'
+        schema_json: '{"id":"int","vector":"array<float>"}'
         embedding_vector_columns:
           - name: vector
             embedding_dimension: 768
diff --git a/acceptance/bundle/deployment/bind/vector_search_index/output.txt b/acceptance/bundle/deployment/bind/vector_search_index/output.txt
@@ -5,7 +5,7 @@
   "endpoint_type": "STANDARD"
 }
 
->>> [CLI] vector-search-indexes create-index --json {"name":"main.default.test_vs_index_[UNIQUE_NAME]","endpoint_name":"test-vs-endpoint-[UNIQUE_NAME]","primary_key":"id","index_type":"DIRECT_ACCESS","direct_access_index_spec":{"schema_json":"{\"id\":\"integer\",\"vector\":\"array<float>\"}","embedding_vector_columns":[{"name":"vector","embedding_dimension":768}]}}
+>>> [CLI] vector-search-indexes create-index --json {"name":"main.default.test_vs_index_[UNIQUE_NAME]","endpoint_name":"test-vs-endpoint-[UNIQUE_NAME]","primary_key":"id","index_type":"DIRECT_ACCESS","direct_access_index_spec":{"schema_json":"{\"id\":\"int\",\"vector\":\"array<float>\"}","embedding_vector_columns":[{"name":"vector","embedding_dimension":768}]}}
 {
   "name": "main.default.test_vs_index_[UNIQUE_NAME]",
   "endpoint_name": "test-vs-endpoint-[UNIQUE_NAME]",

diff --git a/acceptance/bundle/deployment/bind/vector_search_index/script b/acceptance/bundle/deployment/bind/vector_search_index/script
@@ -11,7 +11,7 @@ trap cleanup EXIT
 
 trace $CLI vector-search-endpoints create-endpoint "${ENDPOINT_NAME}" STANDARD | jq '{name, endpoint_type}'
 
-trace $CLI vector-search-indexes create-index --json "{\"name\":\"${INDEX_NAME}\",\"endpoint_name\":\"${ENDPOINT_NAME}\",\"primary_key\":\"id\",\"index_type\":\"DIRECT_ACCESS\",\"direct_access_index_spec\":{\"schema_json\":\"{\\\"id\\\":\\\"integer\\\",\\\"vector\\\":\\\"array<float>\\\"}\",\"embedding_vector_columns\":[{\"name\":\"vector\",\"embedding_dimension\":768}]}}" | jq '{name, endpoint_name, index_type, primary_key}'
+trace $CLI vector-search-indexes create-index --json "{\"name\":\"${INDEX_NAME}\",\"endpoint_name\":\"${ENDPOINT_NAME}\",\"primary_key\":\"id\",\"index_type\":\"DIRECT_ACCESS\",\"direct_access_index_spec\":{\"schema_json\":\"{\\\"id\\\":\\\"int\\\",\\\"vector\\\":\\\"array<float>\\\"}\",\"embedding_vector_columns\":[{\"name\":\"vector\",\"embedding_dimension\":768}]}}" | jq '{name, endpoint_name, index_type, primary_key}'
 
 trace $CLI bundle deployment bind index1 "${INDEX_NAME}" --auto-approve
 

diff --git a/acceptance/bundle/resources/vector_search_indexes/recreate/with_endpoint/output.txt b/acceptance/bundle/resources/vector_search_indexes/recreate/with_endpoint/output.txt
@@ -69,7 +69,7 @@ Plan: 1 to add, 0 to change, 1 to delete, 1 unchanged
               "name": "vector"
             }
           ],
-          "schema_json": "{\"id\":\"integer\",\"vector\":\"array\u003cfloat\u003e\"}"
+          "schema_json": "{\"id\":\"int\",\"vector\":\"array\u003cfloat\u003e\"}"
         },
         "endpoint_name": "vs-endpoint-[UNIQUE_NAME]",
         "endpoint_uuid": "[UUID]",
@@ -82,6 +82,13 @@ Plan: 1 to add, 0 to change, 1 to delete, 1 unchanged
         }
       },
       "changes": {
+        "direct_access_index_spec.schema_json": {
+          "action": "skip",
+          "reason": "normalized_by_backend",
+          "old": "{\"id\":\"integer\",\"vector\":\"array\u003cfloat\u003e\"}",
+          "new": "{\"id\":\"integer\",\"vector\":\"array\u003cfloat\u003e\"}",
+          "remote": "{\"id\":\"int\",\"vector\":\"array\u003cfloat\u003e\"}"
+        },
         "endpoint_uuid": {
           "action": "skip",
           "reason": "state-only field",

diff --git a/acceptance/bundle/resources/vector_search_indexes/schema_normalization/databricks.yml.tmpl b/acceptance/bundle/resources/vector_search_indexes/schema_normalization/databricks.yml.tmpl
@@ -0,0 +1,22 @@
+bundle:
+  name: vs-index-schema-$UNIQUE_NAME
+
+sync:
+  paths: []
+
+resources:
+  vector_search_endpoints:
+    my_endpoint:
+      name: vs-endpoint-$UNIQUE_NAME
+      endpoint_type: STANDARD
+  vector_search_indexes:
+    my_index:
+      name: main.default.vs_index_$UNIQUE_NAME
+      endpoint_name: ${resources.vector_search_endpoints.my_endpoint.name}
+      primary_key: id
+      index_type: DIRECT_ACCESS
+      direct_access_index_spec:
+        schema_json: '{"id":"integer","count":"long","small":"short","tiny":"byte","tags":"array<integer>","score":"float","label":"string","vector":"array<float>"}'
+        embedding_vector_columns:
+          - name: vector
+            embedding_dimension: 768
diff --git a/acceptance/bundle/resources/vector_search_indexes/schema_normalization/out.test.toml b/acceptance/bundle/resources/vector_search_indexes/schema_normalization/out.test.toml
diff --git a/acceptance/bundle/resources/vector_search_indexes/schema_normalization/output.txt b/acceptance/bundle/resources/vector_search_indexes/schema_normalization/output.txt
@@ -0,0 +1,27 @@
+
+>>> [CLI] bundle deploy
+Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/vs-index-schema-[UNIQUE_NAME]/default/files...
+Deploying resources...
+Updating deployment state...
+Deployment complete!
+
+>>> [CLI] vector-search-indexes get-index main.default.vs_index_[UNIQUE_NAME]
+{"count":"bigint","id":"int","label":"string","score":"float","small":"smallint","tags":"array<int>","tiny":"tinyint","vector":"array<float>"}
+
+>>> [CLI] bundle plan
+Plan: 0 to add, 0 to change, 0 to delete, 2 unchanged
+
+>>> [CLI] bundle destroy --auto-approve
+The following resources will be deleted:
+  delete resources.vector_search_endpoints.my_endpoint
+  delete resources.vector_search_indexes.my_index
+
+This action will result in the deletion of the following Vector Search indexes.
+For Delta Sync indexes, the source Delta Table is preserved but the embedding pipeline is removed.
+For Direct Access indexes, all upserted vectors are permanently lost:
+  delete resources.vector_search_indexes.my_index
+
+All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/vs-index-schema-[UNIQUE_NAME]/default
+
+Deleting files...
+Destroy complete!
diff --git a/acceptance/bundle/resources/vector_search_indexes/schema_normalization/script b/acceptance/bundle/resources/vector_search_indexes/schema_normalization/script
@@ -0,0 +1,18 @@
+envsubst < databricks.yml.tmpl > databricks.yml
+
+cleanup() {
+    trace $CLI bundle destroy --auto-approve
+    rm -f out.requests.txt
+}
+trap cleanup EXIT
+
+trace $CLI bundle deploy
+
+# The backend (and the test server) rewrite the schema on create, so
+# get-index returns Spark type names and sorted keys, not the config literal.
+index_name="main.default.vs_index_${UNIQUE_NAME}"
+trace $CLI vector-search-indexes get-index "${index_name}" | jq -r '.direct_access_index_spec.schema_json'
+
+# Re-plan must be a no-op: remote changes to schema_json are ignored
+# (ignore_remote_changes), so the immutable spec does not plan a recreate.
+trace $CLI bundle plan
diff --git a/acceptance/bundle/resources/vector_search_indexes/schema_normalization/test.toml b/acceptance/bundle/resources/vector_search_indexes/schema_normalization/test.toml
@@ -0,0 +1 @@
+Cloud = false
diff --git a/bundle/direct/dresources/resources.yml b/bundle/direct/dresources/resources.yml
@@ -638,6 +638,16 @@ resources:
         reason: immutable
       - field: direct_access_index_spec
         reason: immutable
+    ignore_remote_changes:
+      # The backend rewrites schema_json on create: user-facing type names
+      # ("integer", "long", "short", "byte") are stored in Unity Catalog as
+      # Spark type names ("int", "bigint", "smallint", "tinyint") and the
+      # columns come back in sorted key order, so GET never echoes the user's
+      # literal input. Without this rule the rewrite reads as a change to the
+      # immutable direct_access_index_spec and plans a destructive recreate
+      # that drops all upserted vectors.
+      - field: direct_access_index_spec.schema_json
+        reason: normalized_by_backend
     backend_defaults:
       # The Vector Search API assigns index_subtype when the config omits it
       - field: index_subtype
diff --git a/libs/testserver/vector_search_indexes.go b/libs/testserver/vector_search_indexes.go
@@ -1,6 +1,7 @@
 package testserver
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"net/http"
@@ -70,6 +71,14 @@ func (s *FakeWorkspace) VectorSearchIndexCreate(req Request) Response {
 		indexSubtype = vectorsearch.IndexSubtypeHybrid
 	}
 
+	// The backend rewrites schema_json on create: user-facing type names are
+	// stored as Spark type names (e.g. "integer" -> "int") and the columns are
+	// returned in sorted key order rather than the user's original order.
+	// Mirror that here so the create -> get round-trip matches the real API.
+	if createReq.DirectAccessIndexSpec != nil {
+		createReq.DirectAccessIndexSpec.SchemaJson = normalizeSchemaJSON(createReq.DirectAccessIndexSpec.SchemaJson)
+	}
+
 	index := fakeVectorSearchIndex{
 		VectorIndex: vectorsearch.VectorIndex{
 			Creator:               s.CurrentUser().UserName,
@@ -110,6 +119,58 @@ func isValidIndexName(name string) bool {
 	return true
 }
 
+// normalizeSchemaJSON rewrites a schema_json document the way the backend
+// stores it: user-facing column type names are folded to Spark type names and
+// the columns are re-serialized in sorted key order (encoding/json sorts map
+// keys, matching the backend). Returns the input unchanged when it isn't the
+// expected {"column":"type"} JSON object.
+func normalizeSchemaJSON(schemaJSON string) string {
+	if schemaJSON == "" {
+		return schemaJSON
+	}
+	var schema map[string]string
+	if err := json.Unmarshal([]byte(schemaJSON), &schema); err != nil {
+		return schemaJSON
+	}
+	for column, columnType := range schema {
+		schema[column] = normalizeColumnType(columnType)
+	}
+	// Disable HTML escaping so array<...> keeps its angle brackets verbatim
+	// rather than being rewritten to < / >.
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	enc.SetEscapeHTML(false)
+	if err := enc.Encode(schema); err != nil {
+		return schemaJSON
+	}
+	return strings.TrimRight(buf.String(), "\n")
+}
+
+// normalizeColumnType maps the user-facing column type names the Vector
+// Search API accepts ("integer", "long", "short", "byte") to the Spark type
+// names Unity Catalog stores and GET returns, recursing into array element
+// types. Types whose user-facing and Spark spellings coincide ("float",
+// "string", ...) pass through unchanged.
+func normalizeColumnType(columnType string) string {
+	if inner, ok := strings.CutPrefix(columnType, "array<"); ok {
+		if elem, ok := strings.CutSuffix(inner, ">"); ok {
+			return "array<" + normalizeColumnType(elem) + ">"
+		}
+	}
+	switch columnType {
+	case "integer":
+		return "int"
+	case "long":
+		return "bigint"
+	case "short":
+		return "smallint"
+	case "byte":
+		return "tinyint"
+	default:
+		return columnType
+	}
+}
+
 // remapDeltaSyncSpec converts a request spec to a response spec.
 func remapDeltaSyncSpec(req *vectorsearch.DeltaSyncVectorIndexSpecRequest) *vectorsearch.DeltaSyncVectorIndexSpecResponse {
 	if req == nil {

diff --git a/libs/testserver/vector_search_indexes_test.go b/libs/testserver/vector_search_indexes_test.go
@@ -0,0 +1,51 @@
+package testserver
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNormalizeSchemaJSON(t *testing.T) {
+	tests := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{
+			name: "user-facing type stored as Spark type name",
+			in:   `{"id":"integer","vector":"array<float>"}`,
+			want: `{"id":"int","vector":"array<float>"}`,
+		},
+		{
+			name: "all integer-family names",
+			in:   `{"a":"long","b":"short","c":"byte"}`,
+			want: `{"a":"bigint","b":"smallint","c":"tinyint"}`,
+		},
+		{
+			name: "array element type is mapped",
+			in:   `{"tags":"array<integer>"}`,
+			want: `{"tags":"array<int>"}`,
+		},
+		{
+			name: "matching spellings pass through and keys are sorted",
+			in:   `{"y":"float","x":"string","z":"int"}`,
+			want: `{"x":"string","y":"float","z":"int"}`,
+		},
+		{
+			name: "empty input",
+			in:   "",
+			want: "",
+		},
+		{
+			name: "non-object input is returned unchanged",
+			in:   "not json",
+			want: "not json",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.want, normalizeSchemaJSON(tt.in))
+		})
+	}
+}