{
  "operators": [
    {
      "operatorID": "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54",
      "operatorType": "FileScan",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "encoding": "UTF_8",
        "extract": false,
        "outputFileName": false,
        "attributeType": "large binary",
        "attributeName": "line",
        "fileName": "/texera/rds/v2/all-20230303.rds"
      },
      "inputPorts": [],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": " File Scan",
      "dynamicInputPorts": false,
      "dynamicOutputPorts": false
    },
    {
      "operatorID": "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c",
      "operatorType": "JavaUDF",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "import org.apache.texera.amber.operator.map.MapOpExec;\nimport org.apache.texera.amber.core.tuple.Tuple;\nimport org.apache.texera.amber.core.tuple.TupleLike;\nimport org.apache.texera.amber.core.tuple.LargeBinary;\nimport org.apache.texera.amber.core.tuple.Schema;\nimport org.apache.texera.amber.core.tuple.Attribute;\nimport org.apache.texera.amber.core.tuple.AttributeType;\nimport scala.Function1;\nimport java.io.Serializable;\nimport org.apache.texera.service.util.LargeBinaryInputStream;\nimport org.apache.texera.service.util.LargeBinaryOutputStream;\n\npublic class JavaUDFOpExec extends MapOpExec {\n    // Chunk size for streaming (e.g., 64MB)\n    private static final int CHUNK_SIZE = 64 * 1024 * 1024;\n    \n    // Maximum bytes to read: 3GB\n    private static final long MAX_BYTES = 3L * 1024 * 1024 * 1024;\n    \n    public JavaUDFOpExec () {\n        this.setMapFunc((Function1<Tuple, TupleLike> & Serializable) this::processTuple);\n    }\n    \n    public TupleLike processTuple(Tuple tuple) {\n        LargeBinary LargeBinary = (LargeBinary) tuple.getField(\"line\");\n        \n        try {\n            // Create output LargeBinary first\n            LargeBinary newLargeBinary = new LargeBinary();\n            \n            // Stream exactly 1GB from input to output\n            try (LargeBinaryInputStream inStream = new LargeBinaryInputStream(LargeBinary);\n                 LargeBinaryOutputStream outStream = new LargeBinaryOutputStream(newLargeBinary)) {\n                \n                byte[] buffer = new byte[CHUNK_SIZE];\n                long totalBytesWritten = 0;\n                \n                while (totalBytesWritten < MAX_BYTES) {\n                    // Calculate how many bytes to read in this iteration\n                    int bytesToRead = (int) Math.min(CHUNK_SIZE, MAX_BYTES - totalBytesWritten);\n                    \n                    // Read chunk\n                    int bytesRead = inStream.read(buffer, 0, bytesToRead);\n                    \n                    // Check if we've reached end of stream\n                    if (bytesRead == -1) {\n                        System.out.println(\"Reached end of input at \" + totalBytesWritten + \" bytes (less than 1GB)\");\n                        break;\n                    }\n                    \n                    // Process chunk\n                    byte[] processedChunk = transformData(buffer, bytesRead);\n                    \n                    // Write processed chunk to output\n                    outStream.write(processedChunk, 0, bytesRead);\n                    totalBytesWritten += bytesRead;\n                }\n                \n                System.out.println(\"Processed \" + totalBytesWritten + \" bytes (target: 1GB)\");\n            }\n            \n            System.out.println(\"Created new large binary: \" + newLargeBinary.getUri());\n            \n            // Build a new tuple with the new large binary field\n            Schema newSchema = tuple.getSchema().add(new Attribute(\"processed_line\", AttributeType.LARGE_BINARY));\n            Tuple.Builder builder = Tuple.builder(newSchema);\n            builder.add(tuple, false);\n            builder.add(\"processed_line\", AttributeType.LARGE_BINARY, newLargeBinary);\n            \n            return builder.build();\n            \n        } catch (Exception e) {\n            throw new RuntimeException(\"Error processing large binary\", e);\n        }\n    }\n    \n    /**\n     * Transform a chunk of data.\n     * Note: This receives the actual buffer and the number of valid bytes.\n     */\n    private byte[] transformData(byte[] buffer, int validBytes) {\n        // If you need to transform the data, do it here\n        // For now, just return the same buffer (no transformation)\n        return buffer;\n    }\n}",
        "workers": 1,
        "retainInputColumns": true,
        "outputColumns": [
          {
            "attributeName": "processed_line",
            "attributeType": "large_binary"
          }
        ]
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Java UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca",
      "operatorType": "JavaUDF",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "import org.apache.texera.amber.operator.map.MapOpExec;\nimport org.apache.texera.amber.core.tuple.Tuple;\nimport org.apache.texera.amber.core.tuple.TupleLike;\nimport org.apache.texera.amber.core.tuple.LargeBinary;\nimport org.apache.texera.amber.core.tuple.Schema;\nimport org.apache.texera.amber.core.tuple.Attribute;\nimport org.apache.texera.amber.core.tuple.AttributeType;\nimport scala.Function1;\nimport java.io.Serializable;\nimport org.apache.texera.service.util.LargeBinaryInputStream;\nimport org.apache.texera.service.util.LargeBinaryOutputStream;\n\npublic class JavaUDFOpExec extends MapOpExec {\n    // Chunk size for streaming (e.g., 64MB)\n    private static final int CHUNK_SIZE = 64 * 1024 * 1024;\n    \n    // Maximum bytes to read: 3MB\n    private static final long MAX_BYTES = 3L * 1024 * 1024;\n    \n    public JavaUDFOpExec () {\n        this.setMapFunc((Function1<Tuple, TupleLike> & Serializable) this::processTuple);\n    }\n    \n    public TupleLike processTuple(Tuple tuple) {\n        LargeBinary LargeBinary = (LargeBinary) tuple.getField(\"processed_line\");\n        \n        try {\n            // Create output LargeBinary first\n            LargeBinary newLargeBinary = new LargeBinary();\n            \n            // Stream exactly 1GB from input to output\n            try (LargeBinaryInputStream inStream = new LargeBinaryInputStream(LargeBinary);\n                 LargeBinaryOutputStream outStream = new LargeBinaryOutputStream(newLargeBinary)) {\n                \n                byte[] buffer = new byte[CHUNK_SIZE];\n                long totalBytesWritten = 0;\n                \n                while (totalBytesWritten < MAX_BYTES) {\n                    // Calculate how many bytes to read in this iteration\n                    int bytesToRead = (int) Math.min(CHUNK_SIZE, MAX_BYTES - totalBytesWritten);\n                    \n                    // Read chunk\n                    int bytesRead = inStream.read(buffer, 0, bytesToRead);\n                    \n                    // Check if we've reached end of stream\n                    if (bytesRead == -1) {\n                        System.out.println(\"Reached end of input at \" + totalBytesWritten + \" bytes (less than 1GB)\");\n                        break;\n                    }\n                    \n                    // Process chunk\n                    byte[] processedChunk = transformData(buffer, bytesRead);\n                    \n                    // Write processed chunk to output\n                    outStream.write(processedChunk, 0, bytesRead);\n                    totalBytesWritten += bytesRead;\n                }\n                \n                System.out.println(\"Processed \" + totalBytesWritten + \" bytes (target: 1GB)\");\n            }\n            \n            System.out.println(\"Created new large binary: \" + newLargeBinary.getUri());\n            \n            // Build a new tuple with the new large binary field\n            Schema newSchema = tuple.getSchema().add(new Attribute(\"processed_line2\", AttributeType.LARGE_BINARY));\n            Tuple.Builder builder = Tuple.builder(newSchema);\n            builder.add(tuple, false);\n            builder.add(\"processed_line2\", AttributeType.LARGE_BINARY, newLargeBinary);\n            \n            return builder.build();\n            \n        } catch (Exception e) {\n            throw new RuntimeException(\"Error processing large binary\", e);\n        }\n    }\n    \n    /**\n     * Transform a chunk of data.\n     * Note: This receives the actual buffer and the number of valid bytes.\n     */\n    private byte[] transformData(byte[] buffer, int validBytes) {\n        // If you need to transform the data, do it here\n        // For now, just return the same buffer (no transformation)\n        return buffer;\n    }\n}",
        "workers": 1,
        "retainInputColumns": true,
        "outputColumns": [
          {
            "attributeName": "processed_line2",
            "attributeType": "large_binary"
          }
        ]
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Java UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "Limit-operator-99fbea4a-acad-4b03-a5c3-51a5c5d57dc5",
      "operatorType": "Limit",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "limit": 1
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Limit",
      "dynamicInputPorts": false,
      "dynamicOutputPorts": false
    },
    {
      "operatorID": "PythonUDFV2-operator-e34887ba-b13e-4b44-ad83-0d6df571732c",
      "operatorType": "PythonUDFV2",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "from pytexera import *\n\nclass ProcessTupleOperator(UDFOperatorV2):\n    \n    @overrides\n    def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:\n        # Get the upstream large binary from the \"line\" field\n        input_large_binary = tuple_[\"line\"]\n        \n        # Create a new large binary for output\n        output_large_binary = largebinary()\n        \n        # Read 1GB from input and write to output\n        GB_SIZE = 1024 * 1024 * 1024  # 1GB in bytes\n        \n        with LargeBinaryInputStream(input_large_binary) as input_stream:\n            with LargeBinaryOutputStream(output_large_binary) as output_stream:\n                bytes_read = 0\n                chunk_size = 64 * 1024  # Read in 64KB chunks\n                \n                while bytes_read < GB_SIZE:\n                    # Calculate how much to read in this chunk\n                    remaining = GB_SIZE - bytes_read\n                    read_size = min(chunk_size, remaining)\n                    \n                    chunk = input_stream.read(read_size)\n                    if not chunk:  # EOF reached\n                        break\n                    \n                    output_stream.write(chunk)\n                    bytes_read += len(chunk)\n        \n        # Create a new tuple with the new large binary\n        new_tuple = Tuple(tuple_)\n        new_tuple[\"line\"] = output_large_binary\n        \n        yield new_tuple",
        "workers": 1,
        "retainInputColumns": true
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Python UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "PythonUDFV2-operator-4361f4cf-e2aa-4f3d-b1d1-42ed18e1c05d",
      "operatorType": "PythonUDFV2",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "from pytexera import *\n\nclass ProcessTupleOperator(UDFOperatorV2):\n    \n    @overrides\n    def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:\n        # Get the upstream large binary from the \"line\" field\n        input_large_binary = tuple_[\"line\"]\n        \n        # Create a new large binary for output\n        output_large_binary = largebinary()\n        \n        # Read 1GB from input and write to output\n        GB_SIZE = 1024 * 1024 * 1024  # 1GB in bytes\n        \n        with LargeBinaryInputStream(input_large_binary) as input_stream:\n            with LargeBinaryOutputStream(output_large_binary) as output_stream:\n                bytes_read = 0\n                chunk_size = 64 * 1024  # Read in 64KB chunks\n                \n                while bytes_read < GB_SIZE:\n                    # Calculate how much to read in this chunk\n                    remaining = GB_SIZE - bytes_read\n                    read_size = min(chunk_size, remaining)\n                    \n                    chunk = input_stream.read(read_size)\n                    if not chunk:  # EOF reached\n                        break\n                    \n                    output_stream.write(chunk)\n                    bytes_read += len(chunk)\n        \n        # Create a new tuple with the new large binary\n        new_tuple = Tuple(tuple_)\n        new_tuple[\"line\"] = output_large_binary\n        \n        yield new_tuple",
        "workers": 1,
        "retainInputColumns": true
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Python UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "JavaUDF-operator-9c301d4c-ff2c-4d2f-ae71-0e48323d6282",
      "operatorType": "JavaUDF",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "import org.apache.texera.amber.operator.map.MapOpExec;\nimport org.apache.texera.amber.core.tuple.Tuple;\nimport org.apache.texera.amber.core.tuple.TupleLike;\nimport org.apache.texera.amber.core.tuple.LargeBinary;\nimport org.apache.texera.amber.core.tuple.Schema;\nimport org.apache.texera.amber.core.tuple.Attribute;\nimport org.apache.texera.amber.core.tuple.AttributeType;\nimport scala.Function1;\nimport java.io.Serializable;\nimport org.apache.texera.service.util.LargeBinaryInputStream;\nimport org.apache.texera.service.util.LargeBinaryOutputStream;\n\npublic class JavaUDFOpExec extends MapOpExec {\n    // Chunk size for streaming (e.g., 64MB)\n    private static final int CHUNK_SIZE = 64 * 1024 * 1024;\n    \n    // Maximum bytes to read: 3GB\n    private static final long MAX_BYTES = 3L * 1024 * 1024 * 1024;\n    \n    public JavaUDFOpExec () {\n        this.setMapFunc((Function1<Tuple, TupleLike> & Serializable) this::processTuple);\n    }\n    \n    public TupleLike processTuple(Tuple tuple) {\n        LargeBinary LargeBinary = (LargeBinary) tuple.getField(\"line\");\n        \n        try {\n            // Create output LargeBinary first\n            LargeBinary newLargeBinary = new LargeBinary();\n            \n            // Stream exactly 1GB from input to output\n            try (LargeBinaryInputStream inStream = new LargeBinaryInputStream(LargeBinary);\n                 LargeBinaryOutputStream outStream = new LargeBinaryOutputStream(newLargeBinary)) {\n                \n                byte[] buffer = new byte[CHUNK_SIZE];\n                long totalBytesWritten = 0;\n                \n                while (totalBytesWritten < MAX_BYTES) {\n                    // Calculate how many bytes to read in this iteration\n                    int bytesToRead = (int) Math.min(CHUNK_SIZE, MAX_BYTES - totalBytesWritten);\n                    \n                    // Read chunk\n                    int bytesRead = inStream.read(buffer, 0, bytesToRead);\n                    \n                    // Check if we've reached end of stream\n                    if (bytesRead == -1) {\n                        System.out.println(\"Reached end of input at \" + totalBytesWritten + \" bytes (less than 1GB)\");\n                        break;\n                    }\n                    \n                    // Process chunk\n                    byte[] processedChunk = transformData(buffer, bytesRead);\n                    \n                    // Write processed chunk to output\n                    outStream.write(processedChunk, 0, bytesRead);\n                    totalBytesWritten += bytesRead;\n                }\n                \n                System.out.println(\"Processed \" + totalBytesWritten + \" bytes (target: 1GB)\");\n            }\n            \n            System.out.println(\"Created new large binary: \" + newLargeBinary.getUri());\n            \n            // Build a new tuple with the new large binary field\n            Schema newSchema = tuple.getSchema().add(new Attribute(\"processed_line\", AttributeType.LARGE_BINARY));\n            Tuple.Builder builder = Tuple.builder(newSchema);\n            builder.add(tuple, false);\n            builder.add(\"processed_line\", AttributeType.LARGE_BINARY, newLargeBinary);\n            \n            return builder.build();\n            \n        } catch (Exception e) {\n            throw new RuntimeException(\"Error processing large binary\", e);\n        }\n    }\n    \n    /**\n     * Transform a chunk of data.\n     * Note: This receives the actual buffer and the number of valid bytes.\n     */\n    private byte[] transformData(byte[] buffer, int validBytes) {\n        // If you need to transform the data, do it here\n        // For now, just return the same buffer (no transformation)\n        return buffer;\n    }\n}",
        "workers": 1,
        "retainInputColumns": true,
        "outputColumns": [
          {
            "attributeName": "processed_line",
            "attributeType": "large_binary"
          }
        ]
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Java UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "Limit-operator-bcf1e00d-b86b-4dca-a628-df78a55fe6fa",
      "operatorType": "Limit",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "limit": 1
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Limit",
      "dynamicInputPorts": false,
      "dynamicOutputPorts": false
    }
  ],
  "operatorPositions": {
    "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54": {
      "x": 0,
      "y": -1
    },
    "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c": {
      "x": 140,
      "y": -1
    },
    "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca": {
      "x": 280,
      "y": 0
    },
    "Limit-operator-99fbea4a-acad-4b03-a5c3-51a5c5d57dc5": {
      "x": 420,
      "y": 0
    },
    "PythonUDFV2-operator-e34887ba-b13e-4b44-ad83-0d6df571732c": {
      "x": 159,
      "y": 160
    },
    "PythonUDFV2-operator-4361f4cf-e2aa-4f3d-b1d1-42ed18e1c05d": {
      "x": 302,
      "y": 162
    },
    "JavaUDF-operator-9c301d4c-ff2c-4d2f-ae71-0e48323d6282": {
      "x": 429,
      "y": 165
    },
    "Limit-operator-bcf1e00d-b86b-4dca-a628-df78a55fe6fa": {
      "x": 575,
      "y": 165
    }
  },
  "links": [
    {
      "linkID": "link-0e200954-dad3-41f7-ba8f-7cacbe499935",
      "source": {
        "operatorID": "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c",
        "portID": "input-0"
      }
    },
    {
      "linkID": "56b82b0d-f243-4eff-85fa-ff4cb5520a55",
      "source": {
        "operatorID": "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca",
        "portID": "input-0"
      }
    },
    {
      "linkID": "7f89a6cb-7dc5-422a-9589-8f001aef8522",
      "source": {
        "operatorID": "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "Limit-operator-99fbea4a-acad-4b03-a5c3-51a5c5d57dc5",
        "portID": "input-0"
      }
    },
    {
      "linkID": "link-5de6e443-6054-4d60-8117-f25e5964e392",
      "source": {
        "operatorID": "PythonUDFV2-operator-e34887ba-b13e-4b44-ad83-0d6df571732c",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "PythonUDFV2-operator-4361f4cf-e2aa-4f3d-b1d1-42ed18e1c05d",
        "portID": "input-0"
      }
    },
    {
      "linkID": "3f59035d-2d1f-472e-943d-ee80eeec7bd0",
      "source": {
        "operatorID": "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "PythonUDFV2-operator-e34887ba-b13e-4b44-ad83-0d6df571732c",
        "portID": "input-0"
      }
    },
    {
      "linkID": "18cbe69c-ad2f-4cc7-b613-92e3a355dba5",
      "source": {
        "operatorID": "PythonUDFV2-operator-4361f4cf-e2aa-4f3d-b1d1-42ed18e1c05d",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "JavaUDF-operator-9c301d4c-ff2c-4d2f-ae71-0e48323d6282",
        "portID": "input-0"
      }
    },
    {
      "linkID": "01f75b92-be58-4c75-8f2b-06bb529e228d",
      "source": {
        "operatorID": "JavaUDF-operator-9c301d4c-ff2c-4d2f-ae71-0e48323d6282",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "Limit-operator-bcf1e00d-b86b-4dca-a628-df78a55fe6fa",
        "portID": "input-0"
      }
    }
  ],
  "commentBoxes": [],
  "settings": {
    "dataTransferBatchSize": 400
  }
}