diff --git a/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveManager.scala b/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveManager.scala index 260811a7008..423a5b4a584 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveManager.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveManager.scala @@ -27,6 +27,8 @@ import scala.sys.process._ import java.util.Comparator import org.apache.texera.common.config.PythonUtils import org.apache.texera.dao.SqlServer +import com.typesafe.scalalogging.LazyLogging +import org.apache.commons.lang3.SystemUtils import org.apache.texera.dao.jooq.generated.tables.daos.VirtualEnvironmentsDao import org.apache.texera.dao.jooq.generated.tables.pojos.VirtualEnvironments import org.jooq.JSONB @@ -44,7 +46,7 @@ import org.jooq.JSONB * /tmp/texera-pve/venvs/{cuid}/{pveName}/ */ -object PveManager { +object PveManager extends LazyLogging { case class PvePackageResponse( pveName: String, @@ -67,8 +69,16 @@ object PveManager { private def pveDir(cuid: Int, pveName: String): Path = cuidDir(cuid, pveName).resolve("pve") + // Resolves the Python interpreter inside a venv. POSIX puts it at + // `/bin/python`; Windows puts it at `/Scripts/python.exe`. + private def venvPython(venvDir: Path): Path = + if (SystemUtils.IS_OS_WINDOWS) + venvDir.resolve("Scripts").resolve("python.exe") + else + venvDir.resolve("bin").resolve("python") + private def pythonBinPath(cuid: Int, pveName: String): Path = - pveDir(cuid, pveName).resolve("bin").resolve("python") + venvPython(pveDir(cuid, pveName)) /* * Validates the PVE name and returns the Python binary path if it exists, @@ -103,26 +113,95 @@ object PveManager { } } - private def getSystemPath(isLocal: Boolean): Path = { - Paths.get( - if (isLocal) "amber/system-requirements-lock.txt" - else "/tmp/system-requirements-lock.txt" - ) - } + private def locateRequirementsTxt(): Option[Path] = + Seq(Paths.get("/tmp", "requirements.txt"), Paths.get("amber", "requirements.txt")) + .find(Files.exists(_)) + + // Resolves the fully-pinned system package set by installing requirements.txt + // into a throwaway venv and running `pip freeze`. + private def resolveSystemPackages(): Seq[String] = { + val requirementsPath = locateRequirementsTxt() match { + case Some(p) => p + case None => + logger.error("requirements.txt not found; system package set will be empty") + return Seq.empty + } - def getSystemPackages(isLocal: Boolean): Seq[String] = { - if (!Files.exists(getSystemPath(isLocal))) { - Seq() - } else { - Files - .readAllLines(getSystemPath(isLocal)) - .asScala - .map(_.trim) - .filter(line => line.nonEmpty && !line.startsWith("#")) - .toSeq + val tempVenv = Files.createTempDirectory("texera-system-venv-") + try { + val python = venvPython(tempVenv).toString + val createCode = + Process(Seq(PythonUtils.getPythonExecutable, "-m", "venv", tempVenv.toString)).! + if (createCode != 0) { + logger.error(s"failed to create temp venv for system-package resolution (exit=$createCode)") + return Seq.empty + } + + val installCode = Process( + Seq( + python, + "-u", + "-m", + "pip", + "install", + "--progress-bar", + "off", + "--no-input", + "-r", + requirementsPath.toString + ), + None, + pipEnv.toSeq: _* + ).! + if (installCode != 0) { + logger.error(s"failed to install requirements into temp venv (exit=$installCode)") + return Seq.empty + } + + val collected = scala.collection.mutable.ListBuffer[String]() + val freezeCode = Process(Seq(python, "-m", "pip", "freeze")).!( + ProcessLogger(line => collected += line, _ => ()) + ) + if (freezeCode != 0) { + logger.error(s"pip freeze failed (exit=$freezeCode)") + return Seq.empty + } + collected.toSeq.map(_.trim).filter(line => line.nonEmpty && !line.startsWith("#")) + } finally { + try { + val stream = Files.walk(tempVenv) + try stream + .sorted(Comparator.reverseOrder()) + .iterator() + .asScala + .foreach(Files.deleteIfExists) + finally stream.close() + } catch { + case _: Throwable => () + } } } + // Cached for the JVM lifetime. The system Python + requirements.txt don't + // change without an app restart, so resolving once is sufficient. + private lazy val systemPackages: Seq[String] = resolveSystemPackages() + + // Normalised package names ("numpy", "pandas") — used to reject user + // attempts to install or delete system packages. + private lazy val systemPackageNames: Set[String] = + systemPackages.map(_.split("==")(0).trim.toLowerCase).toSet + + // Materialised once: a file containing the frozen system requirements, + // passed as `pip install --constraint` so user installs respect system pins. + private lazy val systemConstraintFile: Path = { + val f = Files.createTempFile("texera-system-constraint-", ".txt") + Files.write(f, systemPackages.asJava) + f.toFile.deleteOnExit() + f + } + + def getSystemPackages: Seq[String] = systemPackages + private def runPipInstall( python: String, args: Seq[String], @@ -162,20 +241,15 @@ object PveManager { def createNewPve( cuid: Int, queue: BlockingQueue[String], - pveName: String, - isLocal: Boolean + pveName: String ): Unit = { queue.put(s"[PVE] Creating new PVE for cuid: $cuid with name: $pveName") - // NOTE: These paths are derived from computing-unit-master.dockerfile. - // If requirements.txt location changes, update these paths. - val requirementsPath = - if (isLocal) Paths.get("amber", "requirements.txt") - else Paths.get("/tmp", "requirements.txt") - - if (!Files.exists(requirementsPath)) { - queue.put(s"[PVE][ERR] System requirements not found") - return + val requirementsPath = locateRequirementsTxt() match { + case Some(p) => p + case None => + queue.put(s"[PVE][ERR] System requirements not found") + return } val venvDirPath = pveDir(cuid, pveName).toAbsolutePath @@ -354,8 +428,7 @@ object PveManager { packages: List[String], cuid: Int, queue: BlockingQueue[String], - pveName: String, - isLocal: Boolean + pveName: String ): Unit = { val python = pythonBinPath(cuid, pveName).toAbsolutePath.toString @@ -369,19 +442,6 @@ object PveManager { var installedPackages = readPackageFile(metadataPath).toSet - val systemPackages = - if (Files.exists(getSystemPath(isLocal))) { - Files - .readAllLines(getSystemPath(isLocal)) - .asScala - .map(_.trim) - .filter(line => line.nonEmpty && !line.startsWith("#")) - .map(line => line.split("==")(0).trim.toLowerCase) - .toSet - } else { - Set[String]() - } - packages.foreach { pkg => val trimmedPkg = pkg.trim @@ -389,7 +449,7 @@ object PveManager { val userPackageName = trimmedPkg.split("==")(0).trim.toLowerCase - if (systemPackages.contains(userPackageName)) { + if (systemPackageNames.contains(userPackageName)) { queue.put( s"[PVE][ERR] $trimmedPkg is a system package and cannot be installed or modified by the user." ) @@ -401,8 +461,8 @@ object PveManager { val code = runPipInstall( python, Seq( - "--constraint", // check against system-requirements-lock - getSystemPath(isLocal).toString, + "--constraint", // pin to the runtime-resolved system set + systemConstraintFile.toString, trimmedPkg ), queue @@ -440,35 +500,21 @@ object PveManager { def deletePackages( cuid: Int, packageName: String, - pveName: String, - isLocal: Boolean + pveName: String ): List[String] = { val python = pythonBinPath(cuid, pveName).toAbsolutePath.toString val metadataPath = cuidDir(cuid, pveName).resolve("user-packages.txt") if (!Files.exists(Paths.get(python))) { val msg = s"[PVE][ERR] Python executable not found for PVE: $python" - println(msg) + logger.error(msg) return List(msg) } val trimmedPackageName = packageName.trim val normalizedPackageName = trimmedPackageName.split("==")(0).trim.toLowerCase - val systemPackages = - if (Files.exists(getSystemPath(isLocal))) { - Files - .readAllLines(getSystemPath(isLocal)) - .asScala - .map(_.trim) - .filter(line => line.nonEmpty && !line.startsWith("#")) - .map(line => line.split("==")(0).trim.toLowerCase) - .toSet - } else { - Set[String]() - } - - if (systemPackages.contains(normalizedPackageName)) { + if (systemPackageNames.contains(normalizedPackageName)) { return List( s"[PVE][ERR] $trimmedPackageName is a system package and cannot be deleted." ) @@ -494,11 +540,11 @@ object PveManager { val exitCode = command.!( ProcessLogger( out => { - println(s"[pip] $out") + logger.info(s"[pip] $out") output += s"[pip] $out" }, err => { - System.err.println(s"[pip][ERR] $err") + logger.error(s"[pip][ERR] $err") output += s"[pip][ERR] $err" } ) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResource.scala index 4525bd96007..aeac02b1670 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResource.scala @@ -25,7 +25,6 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.typesafe.scalalogging.LazyLogging import io.dropwizard.auth.Auth import org.apache.texera.auth.SessionUser -import org.apache.texera.common.config.KubernetesConfig import org.jooq.exception.DataAccessException import javax.ws.rs._ @@ -55,10 +54,9 @@ class PveResource extends LazyLogging { @Path("/system") @Produces(Array(MediaType.APPLICATION_JSON)) def getSystemPackages: util.Map[String, util.List[String]] = { - val isLocal = !KubernetesConfig.kubernetesComputingUnitEnabled try { val systemPkgs = - PveManager.getSystemPackages(isLocal).toList.asJava + PveManager.getSystemPackages.toList.asJava Map("system" -> systemPkgs).asJava } catch { @@ -208,12 +206,10 @@ class PveResource extends LazyLogging { @PathParam("pveName") pveName: String, @PathParam("packageName") packageName: String ): Response = { - val isLocal = !KubernetesConfig.kubernetesComputingUnitEnabled val messages = PveManager.deletePackages( cuid, packageName, - pveName, - isLocal + pveName ) if (messages.exists(_.contains("[PVE][ERR]"))) { diff --git a/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveWebsocketResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveWebsocketResource.scala index d539b8c31b3..34654c7604c 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveWebsocketResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveWebsocketResource.scala @@ -19,8 +19,6 @@ package org.apache.texera.web.resource.pythonvirtualenvironment -import org.apache.texera.common.config.KubernetesConfig - import javax.websocket._ import javax.websocket.server.ServerEndpoint import java.util.concurrent.LinkedBlockingQueue @@ -43,7 +41,6 @@ class PveWebsocketResource { val cuid = params.get("cuid").get(0).toInt val pveName = params.get("pveName").get(0) - val isLocal = !KubernetesConfig.kubernetesComputingUnitEnabled val action = params.getOrDefault("action", java.util.List.of("create")).get(0) val queue = new LinkedBlockingQueue[String]() @@ -52,7 +49,7 @@ class PveWebsocketResource { try { action match { case "create" => - PveManager.createNewPve(cuid, queue, pveName, isLocal) + PveManager.createNewPve(cuid, queue, pveName) case "install" => val packages = @@ -66,7 +63,7 @@ class PveWebsocketResource { .map(_.replace("\"", "").trim) .filter(_.nonEmpty) - PveManager.installUserPackages(packages, cuid, queue, pveName, isLocal) + PveManager.installUserPackages(packages, cuid, queue, pveName) case _ => queue.put(s"[ERR] Unknown action: $action") diff --git a/amber/src/test/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResourceSpec.scala b/amber/src/test/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResourceSpec.scala index f0de0fa24a6..69c993282d0 100644 --- a/amber/src/test/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResourceSpec.scala +++ b/amber/src/test/scala/org/apache/texera/web/resource/pythonvirtualenvironment/PveResourceSpec.scala @@ -80,7 +80,7 @@ class PveResourceSpec } "PveManager" should "create a new PVE and list it" in { - PveManager.createNewPve(testCuid, queue, testPveName, isLocal = true) + PveManager.createNewPve(testCuid, queue, testPveName) val logs = queueText() @@ -99,7 +99,7 @@ class PveResourceSpec } "PveManager" should "install a user package and list it for the PVE" in { - PveManager.createNewPve(testCuid, queue, testPveName, isLocal = true) + PveManager.createNewPve(testCuid, queue, testPveName) val packageName = "colorama" val packageVersion = "0.4.6" @@ -111,8 +111,7 @@ class PveResourceSpec List(packageSpec), testCuid, queue, - testPveName, - isLocal = true + testPveName ) val logs = queueText() @@ -130,7 +129,7 @@ class PveResourceSpec } "PveManager" should "delete a user package and remove it from the PVE package list" in { - PveManager.createNewPve(testCuid, queue, testPveName, isLocal = true) + PveManager.createNewPve(testCuid, queue, testPveName) val packageName = "colorama" val packageVersion = "0.4.6" @@ -142,8 +141,7 @@ class PveResourceSpec List(packageSpec), testCuid, queue, - testPveName, - isLocal = true + testPveName ) PveManager @@ -155,8 +153,7 @@ class PveResourceSpec val deleteLogs = PveManager.deletePackages( testCuid, packageName, - testPveName, - isLocal = true + testPveName ) deleteLogs.mkString("\n") should not include "[PVE][ERR]" @@ -171,7 +168,7 @@ class PveResourceSpec } "PveManager" should "delete all PVEs for a computing unit" in { - PveManager.createNewPve(testCuid, queue, testPveName, isLocal = true) + PveManager.createNewPve(testCuid, queue, testPveName) Files.exists(testRoot.resolve(testPveName)) shouldBe true @@ -182,7 +179,7 @@ class PveResourceSpec } "PveManager.getPythonBin" should "return Some for an existing venv" in { - PveManager.createNewPve(testCuid, queue, testPveName, isLocal = true) + PveManager.createNewPve(testCuid, queue, testPveName) val result = PveManager.getPythonBin(testCuid, testPveName) result shouldBe defined diff --git a/amber/system-requirements-lock.txt b/amber/system-requirements-lock.txt deleted file mode 100644 index 3447f3e5894..00000000000 --- a/amber/system-requirements-lock.txt +++ /dev/null @@ -1,97 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# This file is manually generated to track system packages used in PVEs. -# NOTE: This file must be updated whenever requirements.txt or -# operator-requirements.txt changes. - -aiohappyeyeballs==2.6.1 -aiohttp==3.13.5 -aioitertools==0.13.0 -aiobotocore==2.25.1 -aiosignal==1.4.0 -annotated-types==0.7.0 -appdirs==1.4.4 -asn1crypto==1.5.1 -attrs==26.1.0 -betterproto==2.0.0b7 -bidict==0.22.0 -boto3==1.40.53 -botocore==1.40.53 -cached_property==1.5.2 -cachetools==6.2.6 -certifi==2026.4.22 -charset_normalizer==3.4.7 -click==8.3.3 -Deprecated==1.2.14 -frozenlist==1.8.0 -fs==2.4.16 -fsspec==2025.9.0 -grpclib==0.4.9 -h2==4.3.0 -hpack==4.1.0 -hyperframe==6.1.0 -idna==3.15 -jmespath==1.1.0 -loguru==0.7.0 -markdown-it-py==4.2.0 -mdurl==0.1.2 -mmh3==5.2.1 -multidict==6.7.1 -numpy==2.1.0 -overrides==7.4.0 -packaging==26.2 -pampy==0.3.0 -pandas==2.2.3 -pg8000==1.31.5 -praw==7.6.1 -prawcore==2.4.0 -propcache==0.5.2 -protobuf==7.34.1 -psutil==5.9.0 -pyarrow==21.0.0 -pydantic==2.13.4 -pydantic-core==2.46.4 -pygments==2.20.0 -pyiceberg==0.11.1 -pympler==1.1 -pyparsing==3.3.2 -pyroaring==1.1.0 -python-dateutil==2.8.2 -pytz==2026.2 -readerwriterlock==1.0.9 -requests==2.34.0 -rich==14.3.4 -ruff==0.14.7 -s3fs==2025.9.0 -s3transfer==0.14.0 -scramp==1.4.8 -setuptools==80.10.2 -six==1.17.0 -SQLAlchemy==2.0.37 -strictyaml==1.7.3 -tenacity==8.5.0 -typing-inspection==0.4.2 -typing_extensions==4.14.1 -tzdata==2026.2 -tzlocal==2.1 -update-checker==0.18.0 -urllib3==2.7.0 -websocket-client==1.9.0 -wrapt==1.17.3 -yarl==1.23.0 -zstandard==0.25.0 \ No newline at end of file diff --git a/bin/computing-unit-master.dockerfile b/bin/computing-unit-master.dockerfile index 12d026ee543..df9928323ce 100644 --- a/bin/computing-unit-master.dockerfile +++ b/bin/computing-unit-master.dockerfile @@ -93,7 +93,6 @@ WORKDIR /texera/amber COPY --from=build /texera/amber/requirements.txt /tmp/requirements.txt COPY --from=build /texera/amber/operator-requirements.txt /tmp/operator-requirements.txt -COPY --from=build /texera/amber/system-requirements-lock.txt /tmp/system-requirements-lock.txt # Install Python runtime dependencies RUN apt-get update && apt-get install -y \