diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index f789869b3e7..b4f8ab0d887 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -7,6 +7,7 @@ ### Dependency updates ### CLI +* Add `databricks psql` command to connect to Lakebase with a single command ([#3128](https://github.com/databricks/cli/pull/3128)) ### Bundles diff --git a/acceptance/cmd/psql/completions/out.test.toml b/acceptance/cmd/psql/completions/out.test.toml new file mode 100644 index 00000000000..8f3575be7b5 --- /dev/null +++ b/acceptance/cmd/psql/completions/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/acceptance/cmd/psql/completions/output.txt b/acceptance/cmd/psql/completions/output.txt new file mode 100644 index 00000000000..94241d4a31b --- /dev/null +++ b/acceptance/cmd/psql/completions/output.txt @@ -0,0 +1,6 @@ + +=== Command should show instances names in autocomplete: +my-database +another-database +:4 +Completion ended with directive: ShellCompDirectiveNoFileComp diff --git a/acceptance/cmd/psql/completions/script b/acceptance/cmd/psql/completions/script new file mode 100644 index 00000000000..b521c171224 --- /dev/null +++ b/acceptance/cmd/psql/completions/script @@ -0,0 +1,2 @@ +title "Command should show instances names in autocomplete:\n" +$CLI __complete psql "" diff --git a/acceptance/cmd/psql/completions/test.toml b/acceptance/cmd/psql/completions/test.toml new file mode 100644 index 00000000000..a01fd713ea1 --- /dev/null +++ b/acceptance/cmd/psql/completions/test.toml @@ -0,0 +1,10 @@ +[[Server]] +Pattern = "GET /api/2.0/database/instances" +Response.Body = ''' +{ + "database_instances": [ + {"name": "my-database"}, + {"name": "another-database"} + ] +} +''' diff --git a/acceptance/cmd/psql/not-available/out.test.toml b/acceptance/cmd/psql/not-available/out.test.toml new file mode 100644 index 00000000000..8f3575be7b5 --- /dev/null +++ b/acceptance/cmd/psql/not-available/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/acceptance/cmd/psql/not-available/output.txt b/acceptance/cmd/psql/not-available/output.txt new file mode 100644 index 00000000000..81431542da7 --- /dev/null +++ b/acceptance/cmd/psql/not-available/output.txt @@ -0,0 +1,44 @@ + +=== Command should exit if the instance is not available +>>> musterr [CLI] psql database-starting +Connecting to Databricks Database Instance database-starting ... +Postgres version: 14 +Database instance status: STARTING +Please retry when the instance becomes available +Error: database instance is not ready for accepting connections + +Exit code (musterr): 1 + +>>> musterr [CLI] psql database-updating +Connecting to Databricks Database Instance database-updating ... +Postgres version: 14 +Database instance status: UPDATING +Please retry when the instance becomes available +Error: database instance is not ready for accepting connections + +Exit code (musterr): 1 + +>>> musterr [CLI] psql database-stopped +Connecting to Databricks Database Instance database-stopped ... +Postgres version: 14 +Database instance status: STOPPED +Error: database instance is not ready for accepting connections + +Exit code (musterr): 1 + +>>> musterr [CLI] psql database-failing-over +Connecting to Databricks Database Instance database-failing-over ... +Postgres version: 14 +Database instance status: FAILING_OVER +Please retry when the instance becomes available +Error: database instance is not ready for accepting connections + +Exit code (musterr): 1 + +>>> musterr [CLI] psql database-deleting +Connecting to Databricks Database Instance database-deleting ... +Postgres version: 14 +Database instance status: DELETING +Error: database instance is not ready for accepting connections + +Exit code (musterr): 1 diff --git a/acceptance/cmd/psql/not-available/script b/acceptance/cmd/psql/not-available/script new file mode 100644 index 00000000000..2a45492ab77 --- /dev/null +++ b/acceptance/cmd/psql/not-available/script @@ -0,0 +1,6 @@ +title "Command should exit if the instance is not available" +trace musterr $CLI psql database-starting +trace musterr $CLI psql database-updating +trace musterr $CLI psql database-stopped +trace musterr $CLI psql database-failing-over +trace musterr $CLI psql database-deleting diff --git a/acceptance/cmd/psql/not-available/test.toml b/acceptance/cmd/psql/not-available/test.toml new file mode 100644 index 00000000000..3308ca8a751 --- /dev/null +++ b/acceptance/cmd/psql/not-available/test.toml @@ -0,0 +1,44 @@ +[[Server]] +Pattern = "GET /api/2.0/database/instances/database-starting" +Response.Body = ''' +{ + "state": "STARTING", + "pg_version": "14" +} +''' + +[[Server]] +Pattern = "GET /api/2.0/database/instances/database-updating" +Response.Body = ''' +{ + "state": "UPDATING", + "pg_version": "14" +} +''' + +[[Server]] +Pattern = "GET /api/2.0/database/instances/database-stopped" +Response.Body = ''' +{ + "state": "STOPPED", + "pg_version": "14" +} +''' + +[[Server]] +Pattern = "GET /api/2.0/database/instances/database-failing-over" +Response.Body = ''' +{ + "state": "FAILING_OVER", + "pg_version": "14" +} +''' + +[[Server]] +Pattern = "GET /api/2.0/database/instances/database-deleting" +Response.Body = ''' +{ + "state": "DELETING", + "pg_version": "14" +} +''' diff --git a/acceptance/cmd/psql/simple/echo-arguments.sh b/acceptance/cmd/psql/simple/echo-arguments.sh new file mode 100755 index 00000000000..29d0539e190 --- /dev/null +++ b/acceptance/cmd/psql/simple/echo-arguments.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# +# This script prints its arguments and exits. +# The test script renames this script to "psql" in order to capture the arguments that the CLI passes to psql command. +# +echo "echo-arguments.sh was called with the following arguments: $@" +echo "PGPASSWORD=${PGPASSWORD}" +echo "PGSSLMODE=${PGSSLMODE}" +exit 0 diff --git a/acceptance/cmd/psql/simple/out.test.toml b/acceptance/cmd/psql/simple/out.test.toml new file mode 100644 index 00000000000..04844bff9e1 --- /dev/null +++ b/acceptance/cmd/psql/simple/out.test.toml @@ -0,0 +1,8 @@ +Local = true +Cloud = false + +[GOOS] + windows = false + +[EnvMatrix] + DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"] diff --git a/acceptance/cmd/psql/simple/output.txt b/acceptance/cmd/psql/simple/output.txt new file mode 100644 index 00000000000..9168442b1a4 --- /dev/null +++ b/acceptance/cmd/psql/simple/output.txt @@ -0,0 +1,82 @@ + +=== Test should use a fake psql command: +>>> psql --version +echo-arguments.sh was called with the following arguments: --version +PGPASSWORD= +PGSSLMODE= + +=== Command should require instance name: +>>> musterr [CLI] psql +Error: could not find any Database instances in the workspace. Please manually specify required argument: DATABASE_INSTANCE_NAME + +Exit code (musterr): 1 + +=== Command should require instance name even if extra arguments are provided: +>>> musterr [CLI] psql -- --dbname=my-database +Error: could not find any Database instances in the workspace. Please manually specify required argument: DATABASE_INSTANCE_NAME + +Exit code (musterr): 1 + +=== Command should use default db name if it is not specified: +>>> [CLI] psql my-database +Connecting to Databricks Database Instance my-database ... +Postgres version: 14 +Database instance status: AVAILABLE +Successfully fetched database credentials +Launching psql with connection to my-database.my-host.com... +echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 --dbname=databricks_postgres +PGPASSWORD=my-secret-token +PGSSLMODE=require + +=== Command should by-pass extra arguments to psql: +>>> [CLI] psql my-database -- -c SELECT * FROM my_table --echo-all +Connecting to Databricks Database Instance my-database ... +Postgres version: 14 +Database instance status: AVAILABLE +Successfully fetched database credentials +Launching psql with connection to my-database.my-host.com... +echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 --dbname=databricks_postgres -c SELECT * FROM my_table --echo-all +PGPASSWORD=my-secret-token +PGSSLMODE=require + +=== Command should use the db name from extra arguments when specified: +>>> [CLI] psql my-database -- --dbname=db1 +Connecting to Databricks Database Instance my-database ... +Postgres version: 14 +Database instance status: AVAILABLE +Successfully fetched database credentials +Launching psql with connection to my-database.my-host.com... +echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 --dbname=db1 +PGPASSWORD=my-secret-token +PGSSLMODE=require + +>>> [CLI] psql my-database -- -d db2 +Connecting to Databricks Database Instance my-database ... +Postgres version: 14 +Database instance status: AVAILABLE +Successfully fetched database credentials +Launching psql with connection to my-database.my-host.com... +echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --port=5432 -d db2 +PGPASSWORD=my-secret-token +PGSSLMODE=require + +=== Command should use the port from extra arguments when specified: +>>> [CLI] psql my-database -- --dbname=db1 -p 3000 +Connecting to Databricks Database Instance my-database ... +Postgres version: 14 +Database instance status: AVAILABLE +Successfully fetched database credentials +Launching psql with connection to my-database.my-host.com... +echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] --dbname=db1 -p 3000 +PGPASSWORD=my-secret-token +PGSSLMODE=require + +>>> [CLI] psql my-database -- -d db2 --port=3001 +Connecting to Databricks Database Instance my-database ... +Postgres version: 14 +Database instance status: AVAILABLE +Successfully fetched database credentials +Launching psql with connection to my-database.my-host.com... +echo-arguments.sh was called with the following arguments: --host=my-database.my-host.com --username=[USERNAME] -d db2 --port=3001 +PGPASSWORD=my-secret-token +PGSSLMODE=require diff --git a/acceptance/cmd/psql/simple/script b/acceptance/cmd/psql/simple/script new file mode 100644 index 00000000000..d662d354203 --- /dev/null +++ b/acceptance/cmd/psql/simple/script @@ -0,0 +1,31 @@ +mv echo-arguments.sh psql + +cleanup() { + rm psql +} +trap cleanup EXIT + +export PATH="$(pwd):$PATH" + +title "Test should use a fake psql command:" +trace psql --version + +title "Command should require instance name:" +trace musterr $CLI psql + +title "Command should require instance name even if extra arguments are provided:" +trace musterr $CLI psql -- --dbname=my-database + +title "Command should use default db name if it is not specified:" +trace $CLI psql my-database + +title "Command should by-pass extra arguments to psql:" +trace $CLI psql my-database -- -c "SELECT * FROM my_table" --echo-all + +title "Command should use the db name from extra arguments when specified:" +trace $CLI psql my-database -- --dbname=db1 +trace $CLI psql my-database -- -d db2 + +title "Command should use the port from extra arguments when specified:" +trace $CLI psql my-database -- --dbname=db1 -p 3000 +trace $CLI psql my-database -- -d db2 --port=3001 diff --git a/acceptance/cmd/psql/simple/test.toml b/acceptance/cmd/psql/simple/test.toml new file mode 100644 index 00000000000..ebf13980a3f --- /dev/null +++ b/acceptance/cmd/psql/simple/test.toml @@ -0,0 +1,32 @@ +# This acceptance test is disabled on Windows runners because +# the current argument capturing method does not work on windows-latest GitHub Runner. +# +# See PR #3228 for documented attempts to fix this issue: +# https://github.com/databricks/cli/pull/3228 +GOOS.windows = false + +[[Server]] +Pattern = "GET /api/2.0/database/instances/my-database" +Response.Body = ''' +{ + "state": "AVAILABLE", + "pg_version": "14", + "read_write_dns": "my-database.my-host.com" +} +''' + +[[Server]] +Pattern = "GET /api/2.0/database/instances" +Response.Body = ''' +{ + "database_instances": [] +} +''' + +[[Server]] +Pattern = "POST /api/2.0/database/credentials" +Response.Body = ''' +{ + "token": "my-secret-token" +} +''' diff --git a/acceptance/help/output.txt b/acceptance/help/output.txt index ca2a59a9d8f..e0303855f0c 100644 --- a/acceptance/help/output.txt +++ b/acceptance/help/output.txt @@ -126,6 +126,7 @@ Clean Rooms Database database Database Instances provide access to a database via REST API or direct SQL. + psql Connect to the specified Database Instance Quality Monitor v2 quality-monitor-v2 Manage data quality of UC objects (currently support schema). diff --git a/cmd/cmd.go b/cmd/cmd.go index fe458294f2b..ef90ccca6ac 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -4,6 +4,8 @@ import ( "context" "strings" + "github.com/databricks/cli/cmd/psql" + "github.com/databricks/cli/cmd/account" "github.com/databricks/cli/cmd/api" "github.com/databricks/cli/cmd/auth" @@ -101,6 +103,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(api.New()) cli.AddCommand(auth.New()) cli.AddCommand(bundle.New()) + cli.AddCommand(psql.New()) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) cli.AddCommand(labs.New(ctx)) diff --git a/cmd/psql/psql.go b/cmd/psql/psql.go new file mode 100644 index 00000000000..928784a50be --- /dev/null +++ b/cmd/psql/psql.go @@ -0,0 +1,102 @@ +package psql + +import ( + "errors" + "fmt" + + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/database" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/lakebase" + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + return newLakebaseConnectCommand() +} + +func newLakebaseConnectCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "psql [DATABASE_INSTANCE_NAME] [-- PSQL_ARGS...]", + Short: "Connect to the specified Database Instance", + GroupID: "database", + Long: `Connect to the specified Database Instance. + +This command requires a psql client to be installed on your machine for the connection to work. + +You can pass additional arguments to psql after a double-dash (--): + databricks psql my-database -- -c "SELECT * FROM my_table" + databricks psql my-database -- --echo-all -d "my-db" +`, + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + w := cmdctx.WorkspaceClient(ctx) + argsLenAtDash := cmd.ArgsLenAtDash() + + // If -- was used, only count args before the dash + var argsBeforeDash int + if argsLenAtDash >= 0 { + argsBeforeDash = argsLenAtDash + } else { + argsBeforeDash = len(args) + } + + if argsBeforeDash != 1 { + promptSpinner := cmdio.Spinner(ctx) + promptSpinner <- "No DATABASE_INSTANCE_NAME argument specified. Loading names for Database instances drop-down." + instances, err := w.Database.ListDatabaseInstancesAll(ctx, database.ListDatabaseInstancesRequest{}) + close(promptSpinner) + if err != nil { + return fmt.Errorf("failed to load names for Database instances drop-down. Please manually specify required argument: DATABASE_INSTANCE_NAME. Original error: %w", err) + } + if len(instances) == 0 { + return errors.New("could not find any Database instances in the workspace. Please manually specify required argument: DATABASE_INSTANCE_NAME") + } + + names := make(map[string]string) + for _, instance := range instances { + names[instance.Name] = instance.Name + } + + name, err := cmdio.Select(ctx, names, "") + if err != nil { + return err + } + + args = append([]string{name}, args...) + } + + databaseInstanceName := args[0] + extraArgs := args[1:] + + return lakebase.Connect(cmd.Context(), databaseInstanceName, extraArgs...) + } + + cmd.ValidArgsFunction = func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + err := root.MustWorkspaceClient(cmd, args) + if err != nil { + return nil, cobra.ShellCompDirectiveError + } + + ctx := cmd.Context() + w := cmdctx.WorkspaceClient(ctx) + instances, err := w.Database.ListDatabaseInstancesAll(ctx, database.ListDatabaseInstancesRequest{}) + if err != nil { + return nil, cobra.ShellCompDirectiveError + } + + var names []string + for _, instance := range instances { + names = append(names, instance.Name) + } + + return names, cobra.ShellCompDirectiveNoFileComp + } + + return cmd +} diff --git a/libs/lakebase/connect.go b/libs/lakebase/connect.go new file mode 100644 index 00000000000..1afacfcea4a --- /dev/null +++ b/libs/lakebase/connect.go @@ -0,0 +1,108 @@ +package lakebase + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/exec" + "github.com/databricks/databricks-sdk-go/service/database" + "github.com/google/uuid" +) + +func Connect(ctx context.Context, databaseInstanceName string, extraArgs ...string) error { + cmdio.LogString(ctx, fmt.Sprintf("Connecting to Databricks Database Instance %s ...", databaseInstanceName)) + + w := cmdctx.WorkspaceClient(ctx) + + // get user: + user, err := w.CurrentUser.Me(ctx) + if err != nil { + return fmt.Errorf("error getting current user: %w", err) + } + + // get database: + db, err := w.Database.GetDatabaseInstance(ctx, database.GetDatabaseInstanceRequest{ + Name: databaseInstanceName, + }) + if err != nil { + return fmt.Errorf("error getting Database Instance. Please confirm that database instance %s exists: %w", databaseInstanceName, err) + } + + cmdio.LogString(ctx, "Postgres version: "+db.PgVersion) + cmdio.LogString(ctx, fmt.Sprintf("Database instance status: %s", db.State)) + + if db.State != database.DatabaseInstanceStateAvailable { + if db.State == database.DatabaseInstanceStateStarting || db.State == database.DatabaseInstanceStateUpdating || db.State == database.DatabaseInstanceStateFailingOver { + cmdio.LogString(ctx, "Please retry when the instance becomes available") + } + return errors.New("database instance is not ready for accepting connections") + } + + // get credentials: + cred, err := w.Database.GenerateDatabaseCredential(ctx, database.GenerateDatabaseCredentialRequest{ + InstanceNames: []string{databaseInstanceName}, + RequestId: uuid.NewString(), + }) + if err != nil { + return fmt.Errorf("error getting database credentials: %w", err) + } + cmdio.LogString(ctx, "Successfully fetched database credentials") + + // Get current working directory + dir, err := os.Getwd() + if err != nil { + return fmt.Errorf("error getting working directory: %w", err) + } + + // Check if database name and port are already specified in extra arguments + hasDbName := false + hasPort := false + for _, arg := range extraArgs { + if arg == "-d" || strings.HasPrefix(arg, "--dbname=") { + hasDbName = true + } + if arg == "-p" || strings.HasPrefix(arg, "--port=") { + hasPort = true + } + } + + // Prepare command arguments + args := []string{ + "psql", + "--host=" + db.ReadWriteDns, + "--username=" + user.UserName, + } + + // Add default port only if not specified in extra arguments + if !hasPort { + args = append(args, "--port=5432") + } + + // Add default database name only if not specified in extra arguments + if !hasDbName { + args = append(args, "--dbname=databricks_postgres") + } + + // Append any extra arguments passed through + args = append(args, extraArgs...) + + // Set environment variables for psql + cmdEnv := append(os.Environ(), + "PGPASSWORD="+cred.Token, + "PGSSLMODE=require", + ) + + cmdio.LogString(ctx, fmt.Sprintf("Launching psql with connection to %s...", db.ReadWriteDns)) + + // Execute psql command inline + return exec.Execv(exec.ExecvOptions{ + Args: args, + Env: cmdEnv, + Dir: dir, + }) +}