diff --git a/.gitignore b/.gitignore index c242a3d..d93a104 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,7 @@ datasets __marimo__ *.svg .mypy_cache -*.prof **.pyc __pycache__ -*.bin *.html output diff --git a/ANALYSIS.md b/ANALYSIS.md new file mode 100644 index 0000000..93bab7e --- /dev/null +++ b/ANALYSIS.md @@ -0,0 +1,231 @@ +# Profiling analysis + +We roughly want to have the following: + +- loop start: + - Physical particle positions are looked up in the grid to calculate the relevant index positions + - Once known, can be "isel"ed using Xarray: + - When this is done in Xarray, it goes to the respective underlying Zarr/Netcdf chunks and retrieve these values (using Dask) + - These chunks which were retrieved from are stored in memory using an LRU cache, skipping potentially expensive trips to disk in future + - isel results are combined to perform the interpolation + - particle positions are updated + +Here we construct 4 test cases to explore how isel behaviour works in Xarray with point cloud data where particles are seeding sparsely (i.e., not full coverage of the Zarr dataset). I think there is a lot more to explore here - this is just a starting point. + +Here we only measure execution time using CProfile. We haven't run memory profiles yet, though that can be easily done. Also note that CProfile (N.B. as far as I know) isn't able to fully introspect Dask workings, so the resulting profile is somewhat limited. + +See [`xarray-profile.py`](./xarray-profile.py) for the exact code to reproduce these results. + +## Data and parameters + +- Array dataset: (small) Zarr dataset (~1.31Mb per chunk) + +``` +>>> _z_store["V_A_grid"].info_complete() +Type : Array +Zarr format : 3 +Data type : Float64(endianness='little') +Fill value : nan +Shape : (30, 50, 200, 200) +Chunk shape : (15, 8, 40, 40) +Order : C +Read-only : True +Store type : LocalStore +Filters : () +Serializer : BytesCodec(endian=) +Compressors : (ZstdCodec(level=0, checksum=False),) +No. bytes : 480000000 (457.8M) +No. bytes stored : 455087483 (434.0M) +Storage ratio : 1.1 +``` + +- Params + - no. particles = 10^5 + - Chunk coverage = 0.03 (i.e., 3% chunk coverage - particles reside in 3% of the chunks of the dataset) + +## Cases + +| Case number | Description | Execution time (s) | +| :---------- | :----------------------------------------------------- | :----------------- | +| 1. | interp on already loaded data. Only profile interp | 0.003 | +| 2. | interp on already loaded data. Profile load and interp | 0.730 | +| 3. | interp using dask (i.e., no pre-fetching) | 2.413 | +| 4. | triple interp using dask (i.e., no pre-fetching) | 7.260 | +| 5. | triple interp using dask with LRU cache Zarr Store | 7.735 | + +See [Appendix A](#appendix-a-full-profiling-results-from-cases) for the full results. + +Evidently working with numpy data (1) is the fastest. Comparing (2) and (3), you can see +that loading _the full data_ and then fetching the values is significantly faster than +using Dask to fetch these values. Looking at (3) and (4), we can see that the interpolation +with Dask scales linearly (alluding to no caching of values or computation graph), and +comparing (4) and (5) shows that the experimental Zarr CacheStore has no performance improvement. + +All of this indicates that the major slowdown is Dask itself - which is also evident when opening the profile outputs in `snakeviz`. +Looking at Viztracer output, it's clear that a lot of time is spent waiting for the thread lock (since we're working with a +Dask threadpool executor). Switching to a single threaded executor via the config actually makes our code in (3), (4), (5) run _faster_ +bringing the execution time for (3) a bit below that of (2). + +# Appendix A: Full profiling results from cases + +The full results are saved in [./saved_outputs](./saved_outputs) (note that the file names have been changed to match the cases above). + +You can use `viztracer saved_outputs` to explore the outputs, or use the following script to print output which are pasted below: + +```python +import pstats +from pstats import SortKey + +for i in [1,2,3,4,5]: + path = f'saved_outputs/compare-for-xarray-folks/case{i}.prof' + p = pstats.Stats(path) + print(f"Summary for {path}") + print("===================") + + p.sort_stats(SortKey.CUMULATIVE).print_stats(10) +``` + +## Summaries + +### Case1 + +``` +Summary for saved_outputs/compare-for-xarray-folks/case1.prof +=================== +Tue Jun 9 13:45:49 2026 saved_outputs/compare-for-xarray-folks/case1.prof + + 3967 function calls (3879 primitive calls) in 0.003 seconds + + Ordered by: cumulative time + List reduced from 226 to 10 due to restriction <10> + + ncalls tottime percall cumtime percall filename:lineno(function) + 1 0.000 0.000 0.003 0.003 /Users/Hodgs004/coding/repos/xarray-interpolation/xarray-profile.py:187(run) + 1 0.000 0.000 0.002 0.002 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:2748(isel) + 1 0.000 0.000 0.002 0.002 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:2903(_isel_fancy) + 9 0.000 0.000 0.002 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/variable.py:1108(isel) + 9 0.000 0.000 0.002 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/variable.py:815(__getitem__) + 9 0.000 0.000 0.002 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/indexing.py:1179(apply_indexer) + 9 0.000 0.000 0.002 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/indexing.py:466(__getitem__) + 2 0.000 0.000 0.001 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/indexing.py:1703(_vindex_get) + 2 0.001 0.000 0.001 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/nputils.py:168(__getitem__) + 4 0.000 0.000 0.001 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/indexing.py:2027(_oindex_get) + + + + +``` + +### Case2 + +``` +Summary for saved_outputs/compare-for-xarray-folks/case2.prof +=================== +Tue Jun 9 13:45:50 2026 saved_outputs/compare-for-xarray-folks/case2.prof + + 1656074 function calls (1587140 primitive calls) in 0.730 seconds + + Ordered by: cumulative time + List reduced from 847 to 10 due to restriction <10> + + ncalls tottime percall cumtime percall filename:lineno(function) + 723/722 0.001 0.000 4.816 0.007 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:140(queue_get) + 2080 0.019 0.000 3.177 0.002 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/asyncio/base_events.py:1977(_run_once) + 723/722 0.002 0.000 2.870 0.004 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/queue.py:177(get) + 2080 0.004 0.000 2.457 0.001 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/selectors.py:540(select) + 1241/3 0.003 0.000 1.254 0.418 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/threading.py:337(wait) + 15919 0.006 0.000 0.768 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/asyncio/events.py:92(_run) + 2/1 0.001 0.001 0.692 0.692 /Users/Hodgs004/coding/repos/xarray-interpolation/xarray-profile.py:194(run) + 2/1 0.000 0.000 0.689 0.689 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:531(load) + 2/1 0.000 0.000 0.689 0.689 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/namedarray/daskmanager.py:80(compute) + 2/1 0.000 0.000 0.689 0.689 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/base.py:601(compute) + + + +``` + +### Case3 + +``` +Summary for saved_outputs/compare-for-xarray-folks/case3.prof +=================== +Tue Jun 9 13:45:52 2026 saved_outputs/compare-for-xarray-folks/case3.prof + + 8733671 function calls (8555789 primitive calls) in 2.413 seconds + + Ordered by: cumulative time + List reduced from 1013 to 10 due to restriction <10> + + ncalls tottime percall cumtime percall filename:lineno(function) +19350/19349 0.005 0.000 1.232 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:140(queue_get) + 2/1 0.005 0.003 0.982 0.982 /Users/Hodgs004/coding/repos/xarray-interpolation/xarray-profile.py:187(run) + 2/1 0.000 0.000 0.976 0.976 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:772(compute) + 2/1 0.000 0.000 0.976 0.976 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:531(load) + 2/1 0.002 0.001 0.976 0.976 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/namedarray/daskmanager.py:80(compute) + 2/1 0.000 0.000 0.975 0.975 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/base.py:601(compute) + 2/1 0.004 0.002 0.974 0.974 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/threaded.py:62(get) + 2/1 0.030 0.015 0.970 0.970 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:382(get_async) +19350/19349 0.025 0.000 0.865 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/queue.py:177(get) + 65 0.000 0.000 0.799 0.012 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/indexing.py:1179(apply_indexer) + + + +``` + +### Case4 + +``` +Summary for saved_outputs/compare-for-xarray-folks/case4.prof +=================== +Tue Jun 9 13:45:59 2026 saved_outputs/compare-for-xarray-folks/case4.prof + + 26153577 function calls (25620421 primitive calls) in 7.260 seconds + + Ordered by: cumulative time + List reduced from 1007 to 10 due to restriction <10> + + ncalls tottime percall cumtime percall filename:lineno(function) + 2/1 0.011 0.005 5.829 5.829 /Users/Hodgs004/coding/repos/xarray-interpolation/xarray-profile.py:202(run) + 4/3 0.000 0.000 4.235 1.412 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:772(compute) + 4/3 0.000 0.000 4.234 1.411 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:531(load) + 4/3 0.002 0.000 4.234 1.411 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/namedarray/daskmanager.py:80(compute) + 4/3 0.000 0.000 4.232 1.411 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/base.py:601(compute) + 4/3 0.014 0.004 4.191 1.397 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/threaded.py:62(get) + 4/3 0.086 0.021 4.176 1.392 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:382(get_async) +58048/58047 0.015 0.000 3.532 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:140(queue_get) +58048/58047 0.076 0.000 3.032 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/queue.py:177(get) + 195 0.000 0.000 2.368 0.012 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/indexing.py:1179(apply_indexer) + + + +``` + +### Case5 + +``` +Summary for saved_outputs/compare-for-xarray-folks/case5.prof +=================== +Tue Jun 9 13:46:07 2026 saved_outputs/compare-for-xarray-folks/case5.prof + + 26113928 function calls (25581215 primitive calls) in 7.735 seconds + + Ordered by: cumulative time + List reduced from 1027 to 10 due to restriction <10> + + ncalls tottime percall cumtime percall filename:lineno(function) + 2/1 0.013 0.006 6.275 6.275 /Users/Hodgs004/coding/repos/xarray-interpolation/xarray-profile.py:202(run) + 4/3 0.000 0.000 4.285 1.428 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:772(compute) + 4/3 0.000 0.000 4.285 1.428 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/core/dataset.py:531(load) + 4/3 0.003 0.001 4.284 1.428 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/xarray/namedarray/daskmanager.py:80(compute) + 4/3 0.000 0.000 4.281 1.427 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/base.py:601(compute) +58048/58047 0.015 0.000 4.254 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:140(queue_get) + 4/3 0.010 0.002 4.238 1.413 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/threaded.py:62(get) + 4/3 0.081 0.020 4.228 1.409 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/site-packages/dask/local.py:382(get_async) +58048/58047 0.076 0.000 2.857 0.000 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/queue.py:177(get) + 324 0.006 0.000 2.812 0.009 /Users/Hodgs004/coding/repos/xarray-interpolation/.pixi/envs/default/lib/python3.14/asyncio/base_events.py:1977(_run_once) + + + +``` + +### Comparison diff --git a/README.md b/README.md index 5212154..d3ece57 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ In our simulations, we would like to run with hundreds of thousands of particles - Some assumptions can be made about particle positions. For example, between timesteps a particle occupies the same or a neighbouring grid cell. This assumption allows for grid searching performance improvements, and potentially a lot of cache hits if the flow data of a previous timestep is already loaded into memory. - This computation is parallelisable on a particle level (assuming no particle-particle interation), however is not parallelisable in time. The location of the particle, and the field locations it samples, is directly dependent on the previous computations. -All of this to illusrtate that the data access patterns within Lagrangian oceanography is fundamentally different to that of Eulerian oceanography, and different to a lot of the image-data processing techniques that are explored in the Xarray and Pangeo communities. +All of this to illustrate that the data access patterns within Lagrangian oceanography is fundamentally different to that of Eulerian oceanography, and different to a lot of the image-data processing techniques that are explored in the Xarray and Pangeo communities. By exploring the performance of Xarray when it comes to the interpolation of point-cloud data within data-cubes, we can hopefully measure how feasible it is to use Xarray in it's current state for integration in Lagrangian simulation frameworks. In the case of poor perforamnce, this profiling will hopefully show (a) what changes can be made to Xarray to enable this usecase, or (b) how Xarray users interested in this use case can use Xarray's current abstractions to achieve acceptable performance for this problem. @@ -47,6 +47,10 @@ Results can be visualized using a flamegraph. - Run `memray flamegraph ` to convert memray output to a HTML representation - Run `python -m http.server 3000` to start an http server to view the file +### Profiling analysis + +See [`ANALYSIS.md`](./ANALYSIS.md). + ### Brainstorming - Could we use a [CacheStore from Zarr](https://zarr.readthedocs.io/en/stable/api/zarr/experimental/#zarr.experimental.cache_store.CacheStore) to load and cache repeatedly used chunks? diff --git a/pixi.lock b/pixi.lock index e3b3bc2..c1f0962 100644 --- a/pixi.lock +++ b/pixi.lock @@ -155,6 +155,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.4-h55fea9a_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.2-h35e630c_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/orc-2.3.0-h21090e2_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/orjson-3.11.9-py314h3b757c3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/p11-kit-0.26.2-h3435931_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pandas-3.0.3-py314hb4ffadd_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pango-1.56.4-hda50119_1.conda @@ -176,6 +177,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.2-h03e3b7b_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h366c992_103.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tornado-6.5.6-py314h5bd0f2a_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/viztracer-1.1.1-py314h5b51525_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/wayland-1.25.0-hd6090a7_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/websockets-16.0-py314h0f05182_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/wrapt-2.2.1-py314h5bd0f2a_0.conda @@ -242,6 +244,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.22.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/objprint-0.3.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.2-pyhc364b38_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.7-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda @@ -315,6 +318,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.2-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mypy_extensions-1.1.0-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/narwhals-2.22.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/objprint-0.3.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.2-pyhc364b38_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.7-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/partd-1.4.2-pyhd8ed1ab_0.conda @@ -465,6 +469,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openjpeg-2.5.4-hd9e9057_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.6.2-hd24854e_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orc-2.3.0-hd11884d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orjson-3.11.9-py314h4e27585_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pandas-3.0.3-py314he609de1_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pango-1.56.4-hf80efc4_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pcre2-10.47-h30297fc_0.conda @@ -484,6 +489,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/snappy-1.2.2-hada39a4_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h010d191_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tornado-6.5.6-py314h6c2aa35_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/viztracer-1.1.1-py314h0612a62_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/websockets-16.0-py314ha14b1ff_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/wrapt-2.2.1-py314h6c2aa35_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xorg-libxau-1.0.12-hc919400_1.conda @@ -2481,6 +2487,20 @@ packages: license_family: APACHE size: 1468651 timestamp: 1773230208923 + - conda: https://conda.anaconda.org/conda-forge/linux-64/orjson-3.11.9-py314h3b757c3_0.conda + sha256: 30b90323ac267e6eab086e730d22c6d48482a1ac8550f7a82afca5f1d197b72d + md5: 7edb3dac35f0a2c8bded8beaff867089 + depends: + - python + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 + - python_abi 3.14.* *_cp314 + constrains: + - __glibc >=2.17 + license: Apache-2.0 + license_family: APACHE + size: 366536 + timestamp: 1778694291974 - conda: https://conda.anaconda.org/conda-forge/linux-64/p11-kit-0.26.2-h3435931_0.conda sha256: f63962d24d81d4fafa15112c03cd5db1fddadd520fdb2ad7ec71a1689e8e694f md5: 312989f1b7318c3763fffdc78df8474e @@ -2819,6 +2839,21 @@ packages: license_family: Apache size: 914451 timestamp: 1779915938568 + - conda: https://conda.anaconda.org/conda-forge/linux-64/viztracer-1.1.1-py314h5b51525_0.conda + sha256: a726714617d8f48de1234ce14c9b39fdf141b571ab565469f15d7d2817fce1b7 + md5: bae6c6aef6505be0a9301d95622ef82c + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc + - libgcc-ng >=12 + - objprint >=0.3.0 + - orjson + - python >=3.14,<3.15.0a0 + - python_abi 3.14.* *_cp314 + license: Apache-2.0 + license_family: APACHE + size: 13082846 + timestamp: 1762827614672 - conda: https://conda.anaconda.org/conda-forge/linux-64/wayland-1.25.0-hd6090a7_0.conda sha256: ea374d57a8fcda281a0a89af0ee49a2c2e99cc4ac97cf2e2db7064e74e764bdb md5: 996583ea9c796e5b915f7d7580b51ea6 @@ -3556,6 +3591,15 @@ packages: license_family: MIT size: 285106 timestamp: 1780347345003 + - conda: https://conda.anaconda.org/conda-forge/noarch/objprint-0.3.0-pyhd8ed1ab_0.conda + sha256: ff58f788e9e8c74a6eb2f194b4c18e5bc39a0000172f0b1ec016afae637961f2 + md5: 8f8399ecb94bd96e0d73e02053525808 + depends: + - python >=3.9 + license: Apache-2.0 + license_family: APACHE + size: 38864 + timestamp: 1731340445369 - conda: https://conda.anaconda.org/conda-forge/noarch/packaging-26.2-pyhc364b38_0.conda sha256: 3906abfb6511a3bb309e39b9b1b7bc38f50a723971de2395489fd1f379255890 md5: 4c06a92e74452cfa53623a81592e8934 @@ -5467,6 +5511,20 @@ packages: license_family: APACHE size: 548180 timestamp: 1773230270828 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/orjson-3.11.9-py314h4e27585_0.conda + sha256: 43d31146626e076d8528d7fa8fe1cbf89b8cc18425834f7840152c746ecd4f63 + md5: 61a1cfd34bdc2d52404886cd229ea78c + depends: + - python + - __osx >=11.0 + - python 3.14.* *_cp314 + - python_abi 3.14.* *_cp314 + constrains: + - __osx >=11.0 + license: Apache-2.0 + license_family: APACHE + size: 333761 + timestamp: 1778694369655 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pandas-3.0.3-py314he609de1_0.conda sha256: 90d84a2a6e7e9826f28f71ff34c7daacd0819c96eb3951f1ab59ef460a75fb58 md5: 703276fc0e3693ff6a7566f1ac6865ab @@ -5766,6 +5824,20 @@ packages: license_family: Apache size: 916141 timestamp: 1779916422402 + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/viztracer-1.1.1-py314h0612a62_0.conda + sha256: 0b2ca942c740172e7b6dbab2339ae8821876b90d4cf449bb1ba5ed8b12a47d89 + md5: bce999062da67d099cc02056db29c22d + depends: + - __osx >=11.0 + - objprint >=0.3.0 + - orjson + - python >=3.14,<3.15.0a0 + - python >=3.14,<3.15.0a0 *_cp314 + - python_abi 3.14.* *_cp314 + license: Apache-2.0 + license_family: APACHE + size: 13059331 + timestamp: 1762828151599 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/websockets-16.0-py314ha14b1ff_1.conda sha256: 2d708c6173773cc4883582c216f0ab1e776155aa74116ef6092b7084c879e92b md5: 0cd5ebc7e220c79c6969ec15e82c741e diff --git a/pixi.toml b/pixi.toml index 81af879..e52f52c 100644 --- a/pixi.toml +++ b/pixi.toml @@ -16,6 +16,7 @@ graphviz = "*" snakeviz = ">=2.2.2,<3" memray = ">=1.19.3,<2" mypy = "*" +viztracer = ">=1.1.1,<2" [tasks] typecheck = { cmd = "mypy" } diff --git a/pyproject.toml b/pyproject.toml index be82807..ec6888b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,5 +11,6 @@ module = [ "dask.*", "zarr.*", "graphviz", + "viztracer", ] ignore_missing_imports = true diff --git a/saved_outputs/compare-for-xarray-folks/case1.prof b/saved_outputs/compare-for-xarray-folks/case1.prof new file mode 100644 index 0000000..e12821a Binary files /dev/null and b/saved_outputs/compare-for-xarray-folks/case1.prof differ diff --git a/saved_outputs/compare-for-xarray-folks/case2.prof b/saved_outputs/compare-for-xarray-folks/case2.prof new file mode 100644 index 0000000..a9eb288 Binary files /dev/null and b/saved_outputs/compare-for-xarray-folks/case2.prof differ diff --git a/saved_outputs/compare-for-xarray-folks/case3.prof b/saved_outputs/compare-for-xarray-folks/case3.prof new file mode 100644 index 0000000..cebd3e0 Binary files /dev/null and b/saved_outputs/compare-for-xarray-folks/case3.prof differ diff --git a/saved_outputs/compare-for-xarray-folks/case4.prof b/saved_outputs/compare-for-xarray-folks/case4.prof new file mode 100644 index 0000000..a82f3cc Binary files /dev/null and b/saved_outputs/compare-for-xarray-folks/case4.prof differ diff --git a/saved_outputs/compare-for-xarray-folks/case5.prof b/saved_outputs/compare-for-xarray-folks/case5.prof new file mode 100644 index 0000000..1ab35d6 Binary files /dev/null and b/saved_outputs/compare-for-xarray-folks/case5.prof differ diff --git a/saved_outputs/compare-for-xarray-folks/summary.json b/saved_outputs/compare-for-xarray-folks/summary.json new file mode 100644 index 0000000..1b765a7 --- /dev/null +++ b/saved_outputs/compare-for-xarray-folks/summary.json @@ -0,0 +1,34 @@ +{ + "test_cases": [ + { + "data": "Data(open_zarr_kwargs={'store': 'datasets/ds_2d_left_agrid_small.zarr', 'consolidated': False}, n_particles=100000, chunk_coverage=0.03, postprocess_ds=)", + "task": "single-interpolation", + "profiler": "profile_execution_time()", + "profile_path": "case1.prof" + }, + { + "data": "Data(open_zarr_kwargs={'store': 'datasets/ds_2d_left_agrid_small.zarr', 'consolidated': False}, n_particles=100000, chunk_coverage=0.03, postprocess_ds=None)", + "task": "load-then-single-interpolation", + "profiler": "profile_execution_time()", + "profile_path": "case2.prof" + }, + { + "data": "Data(open_zarr_kwargs={'store': 'datasets/ds_2d_left_agrid_small.zarr', 'consolidated': False}, n_particles=100000, chunk_coverage=0.03, postprocess_ds=None)", + "task": "single-interpolation", + "profiler": "profile_execution_time()", + "profile_path": "case3.prof" + }, + { + "data": "Data(open_zarr_kwargs={'store': 'datasets/ds_2d_left_agrid_small.zarr', 'consolidated': False}, n_particles=100000, chunk_coverage=0.03, postprocess_ds=None)", + "task": "triple-interpolation", + "profiler": "profile_execution_time()", + "profile_path": "case4.prof" + }, + { + "data": "Data(open_zarr_kwargs={'store': CacheStore(store=LocalStore('file://datasets/ds_2d_left_agrid_small.zarr'), cache_store=MemoryStore('memory://5351835584'), max_age_seconds=infinity, max_size=2097152, current_size=1456223, cached_keys=1), 'consolidated': False}, n_particles=100000, chunk_coverage=0.03, postprocess_ds=None)", + "task": "triple-interpolation", + "profiler": "profile_execution_time()", + "profile_path": "case5.prof" + } + ] +} diff --git a/xarray-profile.py b/xarray-profile.py index 52d8122..ea4ccdf 100644 --- a/xarray-profile.py +++ b/xarray-profile.py @@ -3,11 +3,13 @@ from pathlib import Path +import dask # noqa: F401 import zarr from zarr.abc.store import Store from contextlib import contextmanager import math import cProfile +import zarr.storage import memray from abc import ABC, abstractmethod from typing import Any @@ -16,9 +18,11 @@ import time import json -import zarr.storage +from viztracer import VizTracer from dataclasses import dataclass +# dask.config.set(scheduler="single-threaded") + # full dataset size is ~24Gb. To simulate particles occupying in-memory chunks (an assumption that will hold for Parcels), we set the coverage proportion to be aligned with our machine RAM # i.e., if our usable memory is 2Gb, coverage proportion should be less than 2/24 = 0.083 @@ -187,6 +191,14 @@ def run(self, ds: xr.Dataset, positions: xr.Dataset): ds.isel(positions).compute() +class LoadThenSingleInterpolation(Task): + name = "load-then-single-interpolation" + + def run(self, ds: xr.Dataset, positions: xr.Dataset): + ds = ds.load() + ds.isel(positions) + + class TripleInterpolation(Task): name = "triple-interpolation" @@ -226,6 +238,18 @@ def profile_memory(folder: Path, data: Data, task: Task) -> Path: return report +def run_viztracer(folder: Path, data: Data, task: Task) -> Path: + assert folder.is_dir() + assert folder.exists() + report = folder / f"viztracer_{task.name}_{get_current_time()}.json" + + with data.setup() as (ds, positions): + with open(report, "w") as f: + with VizTracer(output_file=f): + task.run(ds, positions) + return report + + @dataclass class Workspace: folder: Path @@ -295,13 +319,37 @@ def load_dataset(ds: xr.Dataset) -> xr.Dataset: return ds.load() Workspace( - folder=OUTPUT_FOLDER / "compare-load-vs-dask", + folder=OUTPUT_FOLDER / "compare-for-xarray-folks", test_cases=[ - (profile_execution_time, SingleInterpolation(), DEFAULT_DATA_SMALL), + # 1 - interp on already loaded data. Only profile interp ( profile_execution_time, SingleInterpolation(), DEFAULT_DATA_SMALL.then(postprocess_ds=load_dataset), ), + # 2 - interp on already loaded data. Profile load and interp + (profile_execution_time, LoadThenSingleInterpolation(), DEFAULT_DATA_SMALL), + # 3 - interp using dask (i.e., no pre-fetching) + (profile_execution_time, SingleInterpolation(), DEFAULT_DATA_SMALL), + # 4 - triple interp using dask (i.e., no pre-fetching) + (profile_execution_time, TripleInterpolation(), DEFAULT_DATA_SMALL), + # 5 - triple interp using dask with LRU cache Zarr Store + ( + profile_execution_time, + TripleInterpolation(), + Data( + { + "store": create_cache_store( + zarr.storage.LocalStore( + "datasets/ds_2d_left_agrid_small.zarr" + ), + 2 * ONE_GB, + ), + "consolidated": False, + }, + n_particles=N_PARTICLES, + chunk_coverage=DEFAULT_CHUNK_COVERAGE_PROP, + ), + ), ], ).run_test_cases()