From bf00fc4b499ce4c720f31ba8085a5d03a7316004 Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Fri, 8 Aug 2025 14:17:23 -0400 Subject: [PATCH 1/7] chore: address some local dev papercuts - Define `ASSETDB_DATABASE_URI` in docker-compose to avoid startup errors trying to determine IAM auth from a non-existent AssetDB URI - [Hack] Duplicate the Poetry name & version keys to the top-level project in pyproject.toml, avoids some error cases installing the root package into a virtual environment - Set the active schema before running SQL commands in the `make create_database` step, so we don't get "relation not found" errors referencing `query_results` rather than `redash.query_results` --- compose.yaml | 1 + pyproject.toml | 2 ++ redash/cli/database.py | 1 + 3 files changed, 4 insertions(+) diff --git a/compose.yaml b/compose.yaml index 426a35148e..6627b9958b 100644 --- a/compose.yaml +++ b/compose.yaml @@ -13,6 +13,7 @@ x-redash-environment: &redash-environment REDASH_HOST: http://localhost:5001 REDASH_LOG_LEVEL: "INFO" REDASH_REDIS_URL: "redis://redis:6379/0" + ASSETDB_DATABASE_URI: "postgresql://postgres@postgres/postgres" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" REDASH_RATELIMIT_ENABLED: "false" REDASH_MAIL_DEFAULT_SENDER: "redash@example.com" diff --git a/pyproject.toml b/pyproject.toml index d5e93a84ad..5050ad5145 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,7 @@ [project] requires-python = ">=3.8" +name = "redash" +version = "25.06.0-dev" [tool.black] target-version = ['py38'] diff --git a/redash/cli/database.py b/redash/cli/database.py index d4497bb189..ce9624cc36 100644 --- a/redash/cli/database.py +++ b/redash/cli/database.py @@ -79,6 +79,7 @@ def create_tables(): sqlalchemy.orm.configure_mappers() db.create_all() + db.session.execute(f"SET search_path to {settings.SQLALCHEMY_DATABASE_SCHEMA}") db.session.execute("ALTER TABLE query_results ENABLE ROW LEVEL SECURITY") db.session.execute( """ From 9200edc3faa4adbdfa2eb3bf0813d735c7cabe35 Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Fri, 8 Aug 2025 14:23:20 -0400 Subject: [PATCH 2/7] fix: specify db_role when fetching latest query results - Make sure that `QueryResult.get_latest()` considers `db_role` when determining the latest available query result. - Add a test to be sure that change is working as expected. - Go up a level to be sure that `run_query()` also has `db_role` context and passes it along to `get_latest()`. --- redash/handlers/query_results.py | 5 +++-- redash/models/__init__.py | 3 ++- tests/models/test_query_results.py | 11 +++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index 08a7352f03..f3efdfaeac 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -56,7 +56,7 @@ def error_response(message, http_status=400): } -def run_query(query, parameters, data_source, query_id, should_apply_auto_limit, max_age=0): +def run_query(query, parameters, data_source, query_id, should_apply_auto_limit, max_age=0, db_role=None): if not data_source: return error_messages["no_data_source"] @@ -81,7 +81,7 @@ def run_query(query, parameters, data_source, query_id, should_apply_auto_limit, if max_age == 0: query_result = None else: - query_result = models.QueryResult.get_latest(data_source, query_text, max_age) + query_result = models.QueryResult.get_latest(data_source, query_text, max_age, db_role=db_role) record_event( current_user.org, @@ -274,6 +274,7 @@ def post(self, query_id): query_id, should_apply_auto_limit, max_age, + db_role=self.current_user.db_role, ) else: if not query.parameterized.is_safe: diff --git a/redash/models/__init__.py b/redash/models/__init__.py index a6b054e187..0494e19092 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -348,7 +348,7 @@ def unused(cls, days=7): ) @classmethod - def get_latest(cls, data_source, query, max_age=0, is_hash=False): + def get_latest(cls, data_source, query, max_age=0, is_hash=False, db_role=None): if is_hash: query_hash = query else: @@ -363,6 +363,7 @@ def get_latest(cls, data_source, query, max_age=0, is_hash=False): query = cls.query.filter( cls.query_hash == query_hash, cls.data_source == data_source, + cls.db_role == db_role, ( db.func.timezone("utc", cls.retrieved_at) + datetime.timedelta(seconds=max_age) >= db.func.timezone("utc", db.func.now()) diff --git a/tests/models/test_query_results.py b/tests/models/test_query_results.py index 16ea2de3d7..85a3219596 100644 --- a/tests/models/test_query_results.py +++ b/tests/models/test_query_results.py @@ -48,6 +48,17 @@ def test_get_latest_returns_the_most_recent_result(self): self.assertEqual(found_query_result.id, qr.id) + def test_get_latest_returns_results_per_db_role(self): + before = utcnow() - datetime.timedelta(seconds=30) + qr = self.factory.create_query_result(retrieved_at=before) + limited_role_qr = self.factory.create_query_result(db_role='limited') + + default_role_latest_results = models.QueryResult.get_latest(qr.data_source, qr.query_text, 60) + limited_role_latest_results = models.QueryResult.get_latest(qr.data_source, qr.query_text, 60, db_role="limited") + + self.assertEqual(qr.id, default_role_latest_results.id) + self.assertEqual(limited_role_qr.id, limited_role_latest_results.id) + def test_get_latest_returns_the_last_cached_result_for_negative_ttl(self): yesterday = utcnow() + datetime.timedelta(days=-100) self.factory.create_query_result(retrieved_at=yesterday) From b331e01a93dcb525e16e071193c97aa3b057cc8d Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Tue, 12 Aug 2025 23:00:12 -0400 Subject: [PATCH 3/7] tweak serialize_query() and QueryResult.get_latest() Address review feedback and fine-tune based on local testing: - Update the `latest_query_data_id` when serializing a query response, regardless of the active user's `db_role`. - When finding the latest relevant query results, use a separate null check for `db_role`. Use the same role filtering in both branches of the `max_age` check. --- redash/models/__init__.py | 8 ++++++-- redash/serializers/__init__.py | 23 ++++++++++++----------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index 0494e19092..d38e5fefe7 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -358,12 +358,16 @@ def get_latest(cls, data_source, query, max_age=0, is_hash=False, db_role=None): max_age = settings.QUERY_RESULTS_EXPIRED_TTL if max_age == -1: - query = cls.query.filter(cls.query_hash == query_hash, cls.data_source == data_source) + query = cls.query.filter( + cls.query_hash == query_hash, + cls.data_source == data_source, + cls.db_role.is_(None) if db_role is None else (cls.db_role == db_role), + ) else: query = cls.query.filter( cls.query_hash == query_hash, cls.data_source == data_source, - cls.db_role == db_role, + cls.db_role.is_(None) if db_role is None else (cls.db_role == db_role), ( db.func.timezone("utc", cls.retrieved_at) + datetime.timedelta(seconds=max_age) >= db.func.timezone("utc", db.func.now()) diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index ae5b4fb486..238d9a4258 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -137,17 +137,18 @@ def serialize_query( if with_visualizations: d["visualizations"] = [serialize_visualization(vis, with_query=False) for vis in query.visualizations] - if getattr(current_user, "db_role", None): - # Override the latest_query_data_id for users with a db_role because - # they may not actually be able to see that one due to their db_role - # and may have one specific to them instead. - latest_result = models.QueryResult.get_latest( - data_source=query.data_source, - query=query.query_hash, - max_age=-1, - is_hash=True, - ) - d["latest_query_data_id"] = latest_result and latest_result.id or None + # Override the latest_query_data_id to use results matching the active + # user's db_role. This ensures that users with full row-level access + # get complete result sets, and users with restrictions only get result + # sets with those same restrictions applied. + latest_result = models.QueryResult.get_latest( + data_source=query.data_source, + query=query.query_hash, + max_age=-1, + is_hash=True, + db_role=getattr(current_user, "db_role", None), + ) + d["latest_query_data_id"] = latest_result and latest_result.id or None return d From 5707c35c620a045060feb8f16e38c87100c2b38f Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Tue, 12 Aug 2025 23:11:32 -0400 Subject: [PATCH 4/7] don't bother setting the schema in database.py using an empty db will die later anyway because it assumes rls-related roles exist and they won't on a fresh redash db. --- redash/cli/database.py | 1 - 1 file changed, 1 deletion(-) diff --git a/redash/cli/database.py b/redash/cli/database.py index ce9624cc36..d4497bb189 100644 --- a/redash/cli/database.py +++ b/redash/cli/database.py @@ -79,7 +79,6 @@ def create_tables(): sqlalchemy.orm.configure_mappers() db.create_all() - db.session.execute(f"SET search_path to {settings.SQLALCHEMY_DATABASE_SCHEMA}") db.session.execute("ALTER TABLE query_results ENABLE ROW LEVEL SECURITY") db.session.execute( """ From 37dfb02a7c1faa09047b71e52565202f41a8a22b Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Wed, 13 Aug 2025 00:25:23 -0400 Subject: [PATCH 5/7] address cursor findings nice robot, good robot --- redash/handlers/query_results.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index f3efdfaeac..4dd3cfd6ca 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -185,6 +185,7 @@ def post(self): query_id, should_apply_auto_limit, max_age, + db_role=getattr(self.current_user, "db_role", None), ) @@ -274,7 +275,7 @@ def post(self, query_id): query_id, should_apply_auto_limit, max_age, - db_role=self.current_user.db_role, + db_role=getattr(self.current_user, "db_role", None), ) else: if not query.parameterized.is_safe: From 9db9d3ae6df7d423cf47103aa467b1265508723f Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Wed, 13 Aug 2025 00:52:28 -0400 Subject: [PATCH 6/7] base query stats on the per-role latest results --- redash/serializers/__init__.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index 238d9a4258..d617cf8a1e 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -126,14 +126,6 @@ def serialize_query( else: d["last_modified_by_id"] = query.last_modified_by_id - if with_stats: - if query.latest_query_data is not None: - d["retrieved_at"] = query.retrieved_at - d["runtime"] = query.runtime - else: - d["retrieved_at"] = None - d["runtime"] = None - if with_visualizations: d["visualizations"] = [serialize_visualization(vis, with_query=False) for vis in query.visualizations] @@ -150,6 +142,10 @@ def serialize_query( ) d["latest_query_data_id"] = latest_result and latest_result.id or None + if with_stats: + d["retrieved_at"] = latest_result and latest_result.retrieved_at or None + d["runtime"] = latest_result and latest_result.runtime or None + return d From adbff557f9a878256da5314d5ac7967482a20d89 Mon Sep 17 00:00:00 2001 From: AJ Kerrigan Date: Wed, 13 Aug 2025 12:33:03 -0400 Subject: [PATCH 7/7] tradeoff: avoid N+1 during bulk query fetching - Don't override latest query result data with db_role-specific results when bulk-fetching queries (based on `with_stats`) - _Do_ override latest results when fetching an individual query The goal here is to balance API impact and result accuracy/relevance. --- redash/serializers/__init__.py | 45 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index d617cf8a1e..aac4479b6c 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -129,22 +129,37 @@ def serialize_query( if with_visualizations: d["visualizations"] = [serialize_visualization(vis, with_query=False) for vis in query.visualizations] - # Override the latest_query_data_id to use results matching the active - # user's db_role. This ensures that users with full row-level access - # get complete result sets, and users with restrictions only get result - # sets with those same restrictions applied. - latest_result = models.QueryResult.get_latest( - data_source=query.data_source, - query=query.query_hash, - max_age=-1, - is_hash=True, - db_role=getattr(current_user, "db_role", None), - ) - d["latest_query_data_id"] = latest_result and latest_result.id or None - if with_stats: - d["retrieved_at"] = latest_result and latest_result.retrieved_at or None - d["runtime"] = latest_result and latest_result.runtime or None + # If we're serializing queries with stats, we can show last run + # data even if it's not specific to the current user's db_role. + # The downside is that the latest run may come from another + # permission level, but the upside is that we don't need to + # fetch latest runs on demand for each query. + # + # This affects API paths such like: + # - /api/queries + # - /api/queries/my + # - /api/queries/favorites + d["retrieved_at"] = query.latest_query_data and query.retrieved_at or None + d["runtime"] = query.latest_query_data and query.runtime or None + else: + # When we're _not_ fetching stats, we care more about the details + # of a latest run. This affects API paths like: + # + # - /api/queries/ + # + # Override the latest_query_data_id to use results matching the active + # user's db_role. This ensures that users with full row-level access + # get complete result sets, and users with restrictions only get result + # sets with those same restrictions applied. + latest_result = models.QueryResult.get_latest( + data_source=query.data_source, + query=query.query_hash, + max_age=-1, + is_hash=True, + db_role=getattr(current_user, "db_role", None), + ) + d["latest_query_data_id"] = latest_result and latest_result.id or None return d