{
  "operators": [
    {
      "operatorID": "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54",
      "operatorType": "FileScan",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "encoding": "UTF_8",
        "extract": false,
        "outputFileName": false,
        "attributeType": "large binary",
        "attributeName": "line",
        "fileName": "/texera/rds/v2/all-20230303.rds"
      },
      "inputPorts": [],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": " File Scan",
      "dynamicInputPorts": false,
      "dynamicOutputPorts": false
    },
    {
      "operatorID": "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c",
      "operatorType": "JavaUDF",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "import org.apache.texera.amber.operator.map.MapOpExec;\nimport org.apache.texera.amber.core.tuple.Tuple;\nimport org.apache.texera.amber.core.tuple.TupleLike;\nimport org.apache.texera.amber.core.tuple.LargeBinary;\nimport org.apache.texera.amber.core.tuple.Schema;\nimport org.apache.texera.amber.core.tuple.Attribute;\nimport org.apache.texera.amber.core.tuple.AttributeType;\nimport scala.Function1;\nimport java.io.Serializable;\nimport org.apache.texera.service.util.LargeBinaryInputStream;\nimport org.apache.texera.service.util.LargeBinaryOutputStream;\n\npublic class JavaUDFOpExec extends MapOpExec {\n    // Chunk size for streaming (e.g., 64MB)\n    private static final int CHUNK_SIZE = 64 * 1024 * 1024;\n    \n    // Maximum bytes to read: 3GB\n    private static final long MAX_BYTES = 3L * 1024 * 1024 * 1024;\n    \n    public JavaUDFOpExec () {\n        this.setMapFunc((Function1<Tuple, TupleLike> & Serializable) this::processTuple);\n    }\n    \n    public TupleLike processTuple(Tuple tuple) {\n        LargeBinary LargeBinary = (LargeBinary) tuple.getField(\"line\");\n        \n        try {\n            // Create output LargeBinary first\n            LargeBinary newLargeBinary = new LargeBinary();\n            \n            // Stream exactly 1GB from input to output\n            try (LargeBinaryInputStream inStream = new LargeBinaryInputStream(LargeBinary);\n                 LargeBinaryOutputStream outStream = new LargeBinaryOutputStream(newLargeBinary)) {\n                \n                byte[] buffer = new byte[CHUNK_SIZE];\n                long totalBytesWritten = 0;\n                \n                while (totalBytesWritten < MAX_BYTES) {\n                    // Calculate how many bytes to read in this iteration\n                    int bytesToRead = (int) Math.min(CHUNK_SIZE, MAX_BYTES - totalBytesWritten);\n                    \n                    // Read chunk\n                    int bytesRead = inStream.read(buffer, 0, bytesToRead);\n                    \n                    // Check if we've reached end of stream\n                    if (bytesRead == -1) {\n                        System.out.println(\"Reached end of input at \" + totalBytesWritten + \" bytes (less than 1GB)\");\n                        break;\n                    }\n                    \n                    // Process chunk\n                    byte[] processedChunk = transformData(buffer, bytesRead);\n                    \n                    // Write processed chunk to output\n                    outStream.write(processedChunk, 0, bytesRead);\n                    totalBytesWritten += bytesRead;\n                }\n                \n                System.out.println(\"Processed \" + totalBytesWritten + \" bytes (target: 1GB)\");\n            }\n            \n            System.out.println(\"Created new large binary: \" + newLargeBinary.getUri());\n            \n            // Build a new tuple with the new large binary field\n            Schema newSchema = tuple.getSchema().add(new Attribute(\"processed_line\", AttributeType.LARGE_BINARY));\n            Tuple.Builder builder = Tuple.builder(newSchema);\n            builder.add(tuple, false);\n            builder.add(\"processed_line\", AttributeType.LARGE_BINARY, newLargeBinary);\n            \n            return builder.build();\n            \n        } catch (Exception e) {\n            throw new RuntimeException(\"Error processing large binary\", e);\n        }\n    }\n    \n    /**\n     * Transform a chunk of data.\n     * Note: This receives the actual buffer and the number of valid bytes.\n     */\n    private byte[] transformData(byte[] buffer, int validBytes) {\n        // If you need to transform the data, do it here\n        // For now, just return the same buffer (no transformation)\n        return buffer;\n    }\n}",
        "workers": 1,
        "retainInputColumns": true,
        "outputColumns": [
          {
            "attributeName": "processed_line",
            "attributeType": "large_binary"
          }
        ]
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Java UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca",
      "operatorType": "JavaUDF",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "code": "import org.apache.texera.amber.operator.map.MapOpExec;\nimport org.apache.texera.amber.core.tuple.Tuple;\nimport org.apache.texera.amber.core.tuple.TupleLike;\nimport org.apache.texera.amber.core.tuple.LargeBinary;\nimport org.apache.texera.amber.core.tuple.Schema;\nimport org.apache.texera.amber.core.tuple.Attribute;\nimport org.apache.texera.amber.core.tuple.AttributeType;\nimport scala.Function1;\nimport java.io.Serializable;\nimport org.apache.texera.service.util.LargeBinaryInputStream;\nimport org.apache.texera.service.util.LargeBinaryOutputStream;\n\npublic class JavaUDFOpExec extends MapOpExec {\n    // Chunk size for streaming (e.g., 64MB)\n    private static final int CHUNK_SIZE = 64 * 1024 * 1024;\n    \n    // Maximum bytes to read: 3MB\n    private static final long MAX_BYTES = 3L * 1024 * 1024;\n    \n    public JavaUDFOpExec () {\n        this.setMapFunc((Function1<Tuple, TupleLike> & Serializable) this::processTuple);\n    }\n    \n    public TupleLike processTuple(Tuple tuple) {\n        LargeBinary LargeBinary = (LargeBinary) tuple.getField(\"processed_line\");\n        \n        try {\n            // Create output LargeBinary first\n            LargeBinary newLargeBinary = new LargeBinary();\n            \n            // Stream exactly 1GB from input to output\n            try (LargeBinaryInputStream inStream = new LargeBinaryInputStream(LargeBinary);\n                 LargeBinaryOutputStream outStream = new LargeBinaryOutputStream(newLargeBinary)) {\n                \n                byte[] buffer = new byte[CHUNK_SIZE];\n                long totalBytesWritten = 0;\n                \n                while (totalBytesWritten < MAX_BYTES) {\n                    // Calculate how many bytes to read in this iteration\n                    int bytesToRead = (int) Math.min(CHUNK_SIZE, MAX_BYTES - totalBytesWritten);\n                    \n                    // Read chunk\n                    int bytesRead = inStream.read(buffer, 0, bytesToRead);\n                    \n                    // Check if we've reached end of stream\n                    if (bytesRead == -1) {\n                        System.out.println(\"Reached end of input at \" + totalBytesWritten + \" bytes (less than 1GB)\");\n                        break;\n                    }\n                    \n                    // Process chunk\n                    byte[] processedChunk = transformData(buffer, bytesRead);\n                    \n                    // Write processed chunk to output\n                    outStream.write(processedChunk, 0, bytesRead);\n                    totalBytesWritten += bytesRead;\n                }\n                \n                System.out.println(\"Processed \" + totalBytesWritten + \" bytes (target: 1GB)\");\n            }\n            \n            System.out.println(\"Created new large binary: \" + newLargeBinary.getUri());\n            \n            // Build a new tuple with the new large binary field\n            Schema newSchema = tuple.getSchema().add(new Attribute(\"processed_line2\", AttributeType.LARGE_BINARY));\n            Tuple.Builder builder = Tuple.builder(newSchema);\n            builder.add(tuple, false);\n            builder.add(\"processed_line2\", AttributeType.LARGE_BINARY, newLargeBinary);\n            \n            return builder.build();\n            \n        } catch (Exception e) {\n            throw new RuntimeException(\"Error processing large binary\", e);\n        }\n    }\n    \n    /**\n     * Transform a chunk of data.\n     * Note: This receives the actual buffer and the number of valid bytes.\n     */\n    private byte[] transformData(byte[] buffer, int validBytes) {\n        // If you need to transform the data, do it here\n        // For now, just return the same buffer (no transformation)\n        return buffer;\n    }\n}",
        "workers": 1,
        "retainInputColumns": true,
        "outputColumns": [
          {
            "attributeName": "processed_line2",
            "attributeType": "large_binary"
          }
        ]
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": true,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Java UDF",
      "dynamicInputPorts": true,
      "dynamicOutputPorts": true
    },
    {
      "operatorID": "Limit-operator-99fbea4a-acad-4b03-a5c3-51a5c5d57dc5",
      "operatorType": "Limit",
      "operatorVersion": "N/A",
      "operatorProperties": {
        "limit": 1
      },
      "inputPorts": [
        {
          "portID": "input-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false,
          "dependencies": []
        }
      ],
      "outputPorts": [
        {
          "portID": "output-0",
          "displayName": "",
          "allowMultiInputs": false,
          "isDynamicPort": false
        }
      ],
      "showAdvanced": false,
      "isDisabled": false,
      "customDisplayName": "Limit",
      "dynamicInputPorts": false,
      "dynamicOutputPorts": false
    }
  ],
  "operatorPositions": {
    "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54": {
      "x": 0,
      "y": -1
    },
    "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c": {
      "x": 140,
      "y": -1
    },
    "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca": {
      "x": 280,
      "y": 0
    },
    "Limit-operator-99fbea4a-acad-4b03-a5c3-51a5c5d57dc5": {
      "x": 420,
      "y": 0
    }
  },
  "links": [
    {
      "linkID": "link-0e200954-dad3-41f7-ba8f-7cacbe499935",
      "source": {
        "operatorID": "FileScan-operator-10702111-704b-4a1c-aa72-8b07894ecd54",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c",
        "portID": "input-0"
      }
    },
    {
      "linkID": "56b82b0d-f243-4eff-85fa-ff4cb5520a55",
      "source": {
        "operatorID": "JavaUDF-operator-50a37860-6eb1-47ca-b3eb-c42d8f85163c",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca",
        "portID": "input-0"
      }
    },
    {
      "linkID": "7f89a6cb-7dc5-422a-9589-8f001aef8522",
      "source": {
        "operatorID": "JavaUDF-operator-d9ffcd76-7e3a-42e2-92a4-d16b08c27bca",
        "portID": "output-0"
      },
      "target": {
        "operatorID": "Limit-operator-99fbea4a-acad-4b03-a5c3-51a5c5d57dc5",
        "portID": "input-0"
      }
    }
  ],
  "commentBoxes": [],
  "settings": {
    "dataTransferBatchSize": 400
  }
}