diff --git a/.gitignore b/.gitignore index 499167d..b9aa7e0 100644 --- a/.gitignore +++ b/.gitignore @@ -249,3 +249,5 @@ multiColSplay/** singleColSplay/** sym symsEnumsSplay/** + +src/pykx/pykx_init.q_ diff --git a/LICENSE.txt b/LICENSE.txt index 831ab39..43e0d3b 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,7 @@ -This work is dual licensed under "Apache 2.0" and the "Software License for q.so", users are required to abide by the terms of both licenses in their entirety. The terms of these licenses are included below +All files contained within this repository are not covered by a single license. The following outlines the differences. + +1. All files and folders contained within the source code directory 'src/pykx/q.so/' are licensed under the terms of the 'Software License for q.so' which are included below +2. All other files within this repository are licensed under the "Apache 2.0" license include below *********************************************************************************** Apache 2.0 diff --git a/README.md b/README.md index ef86761..3ee08e5 100644 --- a/README.md +++ b/README.md @@ -95,9 +95,9 @@ PyKX depends on the following third-party Python packages: - `pandas>=1.2, <2.0; python_version=='3.8'` - `pandas>=1.2, <=2.2.3; python_version>'3.8'` -- `numpy~=1.22, <2.0; python_version<'3.11'` -- `numpy~=1.23, <2.0; python_version=='3.11'` -- `numpy~=1.26, <2.0; python_version=='3.12'` +- `numpy~=1.22; python_version<'3.11'` +- `numpy~=1.23; python_version=='3.11'` +- `numpy~=1.26; python_version>'3.11'` - `pytz>=2022.1` - `toml~=0.10.2` - `dill>=0.2.0` @@ -105,7 +105,7 @@ PyKX depends on the following third-party Python packages: They are installed automatically by `pip` when PyKX is installed. -PyKX also has an optional Python dependency of `pyarrow>=3.0.0`, which can be included by installing the `pyarrow` extra, e.g. `pip install pykx[pyarrow]` +PyKX also has an optional Python dependency of `pyarrow>=3.0.0, <19.0.0`, which can be included by installing the `pyarrow` extra, e.g. `pip install pykx[pyarrow]` When using PyKX with KX Dashboards users will be required to install `ast2json~=0.3` this can be installed using the `dashboards` extra, e.g. `pip install pykx[dashboards]` @@ -113,6 +113,8 @@ When using PyKX Streaming users may require the ability to stop processes initia When using Streamlit users will be required to install `streamlit~=1.28` this can be installed using the `streamlit` extra, e.g. `pip install pykx[streamlit]` +When attempting to convert data to/from PyTorch users will be required to install `torch>2.1` this can be installed using the `torch` extra, e.g. `pip install pykx[torch]` + **Warning:** Trying to use the `pa` conversion methods of `pykx.K` objects or the `pykx.toq.from_arrow` method when PyArrow is not installed (or could not be imported without error) will raise a `pykx.PyArrowUnavailable` exception. #### Optional Non-Python Dependencies diff --git a/conda-recipe/conda_build_config.yaml b/conda-recipe/conda_build_config.yaml index 0fde6e5..7d39e2b 100644 --- a/conda-recipe/conda_build_config.yaml +++ b/conda-recipe/conda_build_config.yaml @@ -4,3 +4,4 @@ python: - 3.10 - 3.11 - 3.12 + - 3.13 diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 1288c3f..15dd9de 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -18,18 +18,25 @@ requirements: - python - setuptools>=68.0 - setuptools_scm[toml]>=8.0.0 - - cython==3.0.0 - - numpy==1.26 # [py==312] - - numpy==1.22.* # [py<312] + - cython==3.0.* + - numpy==2.* # [py==313] + - numpy==2.0.* # [py==312] + - numpy==2.0.* # [py==311] + - numpy==2.0.* # [py==310] + - numpy==2.0.* # [py==39] + - numpy==1.22.* # [py==38] + - numpy==1.20.* # [py==37] - tomli>=2.0.1 - wheel>=0.36 - sysroot_linux-64 # [linux64] run: - python - - numpy>=1.22,<2.0 + - numpy>=1.20 # [py==37] + - numpy>=1.22 # [py>37] - pandas>=1.2, <=2.2.3 # [py>38] - pandas<2.0 # [py==38] + - pyarrow>=3.0.0, <19.0.0 - pytz>=2022.1 - toml>=0.10.2 - dill>=0.2.0 diff --git a/docs/api/pykx-execution/q.md b/docs/api/pykx-execution/q.md index f6395c3..4dba42e 100644 --- a/docs/api/pykx-execution/q.md +++ b/docs/api/pykx-execution/q.md @@ -112,7 +112,7 @@ pykx.List(pykx.q(' Restricted evaluation of a parse tree. -Behaves similar to [`eval`](#eval) except the evaluation is blocked from modifying values or global state. +Behaves similar to [`eval`](#eval) except the evaluation is blocked from modifying state for any handle context other than 0. ```python >>> pykx.q.reval(pykx.q.parse(b'til 10')) diff --git a/docs/beta-features/index.md b/docs/beta-features/index.md index 47341b5..83e211b 100644 --- a/docs/beta-features/index.md +++ b/docs/beta-features/index.md @@ -1,8 +1,13 @@ +--- +title: PyKX Beta Features +description: PyKX features in beta status +date: January 2025 +author: KX Systems, Inc., +tags: PyKX, beta features, +--- # Beta Features -!!! "Note" - - There are currently no active features in beta status, the following page outlines broadly the concept of beta features within PyKX and how it is managed today +_This page provides an overview of PyKX Beta Features, including what they are, how to enable them, and what features are available._ ## What is a Beta Feature? @@ -12,14 +17,14 @@ Feedback on Beta Feature development is incredibly helpful and helps to determin ## How do I enable Beta Features? -Within PyKX beta features are enabled through the use of a configuration/environment variable `PYKX_BETA_FEATURES`, within a Python session users can set this prior to importing PyKX as shown below, note that when enabled you will be able to see what features are in beta through access of `kx.beta_features`: +Enable PyKX beta features using the `#!python PYKX_BETA_FEATURES` configuration/environment variable. Set this before importing PyKX in a Python session, as shown below, to view available beta features through `#!python kx.beta_features`: ```python >>> import os >>> os.environ['PYKX_BETA_FEATURES'] = 'True' >>> import pykx as kx >>> kx.beta_features -[] +['PyTorch Conversions'] ``` Alternatively you can set beta features to be available at all times by adding `PYKX_BETA_FEATURES` to your `.pykx-config` file as outlined [here](../user-guide/configuration.md#configuration-file). An example of a configuration making use of this is as follows: @@ -34,12 +39,12 @@ PYKX_BETA_FEATURES='true' ## What Beta Features are available? -As mentioned above the list of available features to a user is contained within the `beta_features` property, for users with these features available you can get access to this information as follows within a Python session +As mentioned above, the `beta_features` property contains the list of available features. You can retrieve this information in a Python session as follows: ```python >>> import pykx as kx >>> kx.beta_features -[] +['PyTorch Conversions'] ``` -There are currently no active features in beta status. This page will be updated when new beta features are added at a future point in time. +1. [`PyTorch Conversions`](torch.md): Allow users to convert numeric type PyKX vectors and N-Dimensional lists to PyTorch Tensor objects. diff --git a/docs/beta-features/torch.md b/docs/beta-features/torch.md new file mode 100644 index 0000000..cd0ef87 --- /dev/null +++ b/docs/beta-features/torch.md @@ -0,0 +1,110 @@ +--- +title: PyTorch Conversions +description: PyTorch Tensor Conversions beta feature in PyKX +date: January 2025 +author: KX Systems, Inc., +tags: PyKX, PyTorch Tensor +--- +# PyTorch Conversions + +_This page provides an overview of PyTorch Conversions, a beta feature in PyKX._ + +!!! Warning + + This functionality is provided as a Beta Feature and is subject to change. To enable this functionality for testing please follow the configuration instructions [here](../user-guide/configuration.md) setting `PYKX_BETA_FEATURES='true'`. + +## Introduction + +Commonly used in the development of complex machine learning algorithms, PyTorch is a machine learning library based on the Torch library and is used in applications such as computer vision and natural language processing. Originally developed by Meta AI it is now widely used in the open-source community for algorithm development. + +This beta feature allows PyKX users to convert PyKX Vector/List objects into their PyTorch [Tensor](https://pytorch.org/docs/stable/tensors.html) equivalents. + +## Requirements and limitations + + +Before you run this functionality, first you must install `torch>2.1` in your local Python session, by using the following command: + +```bash +pip install pykx[torch] +``` + +## Functional walkthrough + +This walkthrough demonstrates the following steps: + +1. Convert a PyKX Vector object to a Tensor object. +1. Convert a PyKX List object to a Tensor object. +1. Convert a Tensor object to a PyKX equivalent object. + +### Vector to Tensor + +Use the `*.pt()` methods to convert PyKX numeric data representations to Tensor objects. In the example below we convert PyKX numeric types to their PyTorch Tensor equivalents: + +```python +>>> import os +>>> os.environ['PYKX_BETA_FEATURES'] = 'True' +>>> import pykx as kx +>>> svec = kx.q('1 2 3h') +>>> lvec = kx.q('1 2 3j') +>>> rvec = kx.q('1 2 3e') +>>> fvec = kx.q('1 2 3f') +>>> svec.pt() +tensor([1, 2, 3], dtype=torch.int16) +>>> lvec.pt() +tensor([1, 2, 3]) +>>> rvec.pt() +tensor([1., 2., 3.]) +>>> fvec.pt() +tensor([1., 2., 3.], dtype=torch.float64) +``` + +In particular note in the above that the data types are converted to their Tensor size equivalent. + +### List to Tensor + +To convert PyKX List objects to Tensors, two criteria must be met: + +1. The `#!python pykx.List` contains only data of a single type. +1. The `#!python pykx.List` is an N-Dimensional regularly shaped/rectangular structure. + +By default conversions to a `#!python torch.Tensor` object test for these criteria and it throws an error if they are not met as follows: + +```python +>>> import os +>>> os.environ['PYKX_BETA_FEATURES'] = 'True' +>>> import pykx as kx +>>> kx.q('(1 2;2 3f)').pt() +TypeError: Data must be a singular type "rectangular" matrix +``` + +A working example of this is as follows: + +```python +>>> kx.q('100 100 100#1000000?1f').pt() +tensor([[[0.3928, 0.5171, 0.5160, ..., 0.3410, 0.8618, 0.5549], + [0.0617, 0.2858, 0.6685, ..., 0.9234, 0.4016, 0.5619], + [0.7249, 0.8112, 0.2087, ..., 0.3187, 0.1873, 0.8416], + ..., + dtype=torch.float64) +``` + +Having to pre-compute the shape of the data can slow down the processing of large matrices. To avoid this, if you already know the final shape of the tensor, you can specify it using the `#!python reshape` keyword in advance. + +```python +>>> kx.q('100 100 100#1000000?1f').pt(reshape=[100, 100, 100]) +tensor([[[0.3928, 0.5171, 0.5160, ..., 0.3410, 0.8618, 0.5549], + [0.0617, 0.2858, 0.6685, ..., 0.9234, 0.4016, 0.5619], + [0.7249, 0.8112, 0.2087, ..., 0.3187, 0.1873, 0.8416], + ..., + dtype=torch.float64) +``` + +While not clear from the above for particularly complex nested `#!python pykx.List` objects setting the data shape can provide significant performance boosts: + +```python +lst = kx.q('100 100 100 100#100000000?1f') +%timeit lst.pt() +# 1.22 s ± 24.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) +%timeit lst.pt(reshape=[100, 100, 100, 100]) +# 265 ms ± 4.96 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) +``` diff --git a/docs/examples/jupyter-integration.ipynb b/docs/examples/jupyter-integration.ipynb index ddc8dfd..6de2326 100644 --- a/docs/examples/jupyter-integration.ipynb +++ b/docs/examples/jupyter-integration.ipynb @@ -19,7 +19,9 @@ "1. [Execute against Embedded q](#3-execute-against-embedded-q)\n", "1. [SQL interface](#4-sql-interface)\n", "1. [q namespaces](#5-q-namespaces)\n", - "1. [(Advanced) q over IPC](#6-advanced-q-over-ipc)" + "1. [q over IPC](#6-q-over-ipc)\n", + "1. [q first mode](#7-q-first-mode)\n", + "1. [Saving code blocks](#8-saving-code-blocks)" ] }, { @@ -157,94 +159,6 @@ "([] a: 1 2 3)" ] }, - { - "cell_type": "markdown", - "id": "cda0d38a", - "metadata": {}, - "source": [ - "#### Executing against an external q process over IPC\n", - "\n", - "Connection information can also be included after the `%%q` to connect to a remote `q` process over\n", - "IPC.\n", - "\n", - "Here is the list of currently supported connection parameters.\n", - "If they specify a type a second value is expected to follow them to be used as the parameter.\n", - "If no type follows them they can be used as a stand alone flag.\n", - "\n", - "```\n", - "--host: A string object denoting the host to connect to\n", - "--port: An int object denoting the port to connect over\n", - "--user: A str object denoting the username to use when connecting\n", - "--password: A str object denoting the password to use when connecting\n", - "--timeout: A float object denoting the time in seconds before the query\n", - " times out, defaults to no timeout\n", - "--nolarge: Disable messages over 2GB being sent / received\n", - "--tls: Use a tls connection\n", - "--unix: Use a unix connection\n", - "--reconnection_attempts: An int object denoting how many\n", - " reconnection attempts to make\n", - "--noctx: Disable the context interface\n", - "```\n", - "\n", - "Connect to a q server running on `localhost` at port `5001` as `user` using password `password`\n", - "and disable the context interface." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1faca1e1", - "metadata": {}, - "outputs": [], - "source": [ - "%%q --host localhost --port 5000 --user user --pass password --noctx\n", - "til 10" - ] - }, - { - "cell_type": "markdown", - "id": "f046ebb6", - "metadata": {}, - "source": [ - "All connection arguments are optional with the exception of the `--port` argument. If `--host` is not provided `localhost` will be used as the default host." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "615d7d2e", - "metadata": {}, - "outputs": [], - "source": [ - "%%q --port 5000\n", - "tab:([]a:1000?1000; b:1000?500.0; c:1000?`AAPL`MSFT`GOOG);" - ] - }, - { - "cell_type": "markdown", - "id": "d756f342", - "metadata": {}, - "source": [ - "It is possible to execute `q` code spanning multiple lines." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c739a80a", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "%%q --port 5000\n", - "afunc: {[x; y]\n", - " x + y \n", - " };\n", - "afunc[0; 1]\n", - "afunc[2; 3]" - ] - }, { "cell_type": "markdown", "id": "2905895e", @@ -313,7 +227,7 @@ "id": "52ca850e", "metadata": {}, "source": [ - "## 6. (Advanced) q over IPC\n", + "## 6. q over IPC\n", "\n", "After `%%q` you can include connection information, if you wish to connect to a remote `q` process over IPC. \n", "\n", @@ -386,7 +300,7 @@ "source": [ "%%q --port 5000\n", "afunc: {[x; y]\n", - " x + y \n", + " x + y\n", " };\n", "afunc[0; 1]\n", "afunc[2; 3]" @@ -408,7 +322,7 @@ "id": "607997ac-e7d7-4cc9-a06f-aa1cd3d742ce", "metadata": {}, "source": [ - "#### q first mode\n", + "## 7. q first mode\n", "q first mode can be enabled by importing PyKX after setting the environment variable `PYKX_JUPYTERQ` to `true`, or at runtime use:" ] }, @@ -533,7 +447,7 @@ "id": "080dd085-e54f-478c-a305-eac9f23db020", "metadata": {}, "source": [ - "#### Saving code blocks\n", + "## 8. Saving code blocks\n", "The `--save` feature allows user to save code in a cell as a q file.\n", "\n", "To use this feature, include `--save` followed by the `path` of the file.\n", diff --git a/docs/examples/server/server.md b/docs/examples/server/server.md index 4e34eb8..1c29d5f 100644 --- a/docs/examples/server/server.md +++ b/docs/examples/server/server.md @@ -88,3 +88,7 @@ a float as input and the value denotes how often the server will attempt to clea By default the value is `#!python 0.0` and this will cause the list of connections to be cleaned on every call to `#!python poll_recv`. With lots of incoming connections, this can deteriorate the performance. If you set the `#!python conn_gc_time` to `#!python 10.0` then this clean-up happens every 10 seconds. + +!!! Note + + [reval](../../api/pykx-execution/q.md#reval) will not impose read only exection on a PyKX server as Python manages the sockets rather than `q`. \ No newline at end of file diff --git a/docs/getting-started/installing.md b/docs/getting-started/installing.md index 168492a..4b8590b 100644 --- a/docs/getting-started/installing.md +++ b/docs/getting-started/installing.md @@ -9,11 +9,19 @@ tags: PyKX, setup, install, _This page explains how to install PyKX on your machine._ +!!! License + + PyKX is released under a dual license covering the files within the [PyKX repository](https://github.com/kxsystems/pykx) as outlined [here](../license.md). + + **Acceptance of license terms:** + + By downloading, installing, or using PyKX, you acknowledge and agree that you have read, understood, and accept the license [link](../license.md) and will adhere to its terms. + ## Pre-requisites Before you start, make sure you have: -- [**Python**](https://www.python.org/downloads/) (versions 3.8-3.12) +- [**Python**](https://www.python.org/downloads/) (versions 3.8-3.13) - [**pip**](https://pypi.org/project/pip/) Recommended: a virtual environment with packages such as [venv](https://docs.python.org/3/library/venv.html) from the standard library. @@ -22,9 +30,9 @@ Recommended: a virtual environment with packages such as [venv](https://docs.pyt KX only supports versions of PyKX built by KX (installed from wheel files) for: -- **Linux** (`manylinux_2_17_x86_64`, `linux-arm64`) with CPython 3.8-3.12 -- **macOS** (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.12 -- **Windows** (`win_amd64`) with CPython 3.8-3.12 +- **Linux** (`manylinux_2_17_x86_64`, `linux-arm64`) with CPython 3.8-3.13 +- **macOS** (`macosx_10_10_x86_64`, `macosx_10_10_arm`) with CPython 3.8-3.13 +- **Windows** (`win_amd64`) with CPython 3.8-3.13 We provide assistance to user-built installations of PyKX only on a best-effort basis. @@ -32,6 +40,10 @@ We provide assistance to user-built installations of PyKX only on a best-effort You can install PyKX from three sources: +!!! Note "Installing in air-capped environments" + + If you are installing in a location without internet connection you may find [this section](#installing-in-an-air-gapped-environment) useful. + === "Install PyKX from PyPI" Ensure you have a recent version of `#!bash pip`: @@ -256,9 +268,9 @@ This command should display the installed version of PyKX. - `pandas>=1.2, <2.0; python_version=='3.8'` - `pandas>=1.2, <=2.2.3; python_version>'3.8'` - - `numpy~=1.22, <2.0; python_version<'3.11'` - - `numpy~=1.23, <2.0; python_version=='3.11'` - - `numpy~=1.26, <2.0; python_version=='3.12'` + - `numpy~=1.22; python_version<'3.11'` + - `numpy~=1.23; python_version=='3.11'` + - `numpy~=1.26; python_version>='3.12'` - `pytz>=2022.1` - `toml~=0.10.2` - `dill>=0.2.0` @@ -279,13 +291,14 @@ This command should display the installed version of PyKX. **Optional Python dependencies:** - - **`pyarrow >=3.0.0`**: install `pyarrow` extra, for example `pip install pykx[pyarrow]`. + - **`pyarrow >=3.0.0, <19.0.0`**: install `pyarrow` extra, for example `pip install pykx[pyarrow]`. - **`find-libpython ~=0.2`**: install `debug` extra, for example `pip install pykx[debug]`. - **`ast2json ~=0.3`**: install with `dashboards` extra, for example `pip install pykx[dashboards]` - **`dill >=0.2`**: install via pip, with `remote` extra, for example `pip install pykx[remote]` - **`beautifulsoup4 >=4.10.0`**: install with `help` extra, for example `pip install pykx[help]` - **`markdown2 >=2.5.0`**: install with `help` extra, for example `pip install pykx[help]` - **`psutil >=5.0.0`**: install via pip, with `streaming` extra, for example `pip install pykx[streaming]` + - **`torch >2.1`**: install via pip, with `torch` extra, for example `pip install pykx[torch]` Here's a breakdown of how PyKX uses these libraries: @@ -293,6 +306,7 @@ This command should display the installed version of PyKX. - [find-libpython](https://pypi.org/project/find-libpython): provides the `libpython.{so|dll|dylib}` file required by [PyKX under q](../pykx-under-q/intro.md). - [ast2json](https://pypi.org/project/ast2json/): required for KX Dashboards Direct integration. - [psutil](https://pypi.org/project/psutil/): facilitates the stopping and killing of a q process on a specified port allowing for orphaned q processes to be stopped, functionality defined [here](../api/util.md#pykxutilkill_q_process). + - [torch](https://pytorch.org/docs/stable/): required for conversions between `#!python torch.Tensor` objects and their PyKX equivalents. **Optional non-Python dependencies:** @@ -328,9 +342,9 @@ If however you need to make use of the [Real-Time Capture](../user-guide/advance By default when attempting to start a q process for use within the Real-Time Capture workflows PyKX will attempt to call `q` directly, this method however is not fully reliable when using the Python `subprocess` module. As such the following setup can be completed to point more explicitly at your executable. -If you already have a q executable PyKX can use this when initializing the Real-Time Capture APIs through the setting of the following in you [configuration file](../user-guide/configuration.md#configuration-file) or as [environment variables](../user-guide/configuration.md#environment-variables) +If you already have a q executable, PyKX can use this when initializing the Real-Time Capture APIs through the setting of the following in your [configuration file](../user-guide/configuration.md#configuration-file) or as [environment variables](../user-guide/configuration.md#environment-variables): -| Variable | Explanation | +| **Variable** | **Explanation** | | :------------------ | :--------------------------------------------------------------------------------------------------------------- | | `PYKX_Q_EXECUTABLE` | Specifies the location of the q executable which should be called. Typically this will be `QHOME/[lmw]64/q[.exe]`| | `QHOME` | The directory to which q was installed | @@ -343,7 +357,7 @@ For users who do not have access to a q executable, PyKX provides a utility func The following default information is used when installing the q executable: -| Parameter | Default | Explanation | +| **Parameter** | **Default** | **Explanation** | | :--------------- | :------------------ | :--------------------------------------------------------------------------------------------------------------------- | | location | `'~/q'` or `'C:\q'` | The location to which q will be installed if not otherwise specified. | | date | `'2024.07.08'` | The dated version of kdb+ 4.0 which is to be installed. | @@ -368,6 +382,40 @@ Installation of q via this method will update the configuration file `.pykx-conf The installed q executable is not required to be installed via PyKX. If you wish to install q following the traditional approach you can follow the install instructions outlined [here](https://code.kx.com/q/learn/install/) or through signing up for a free-trial [here](https://kx.com/download-kdb/). +### Installing in an air-gapped environment + +Installing Python libraries in air-gapped environments requires users to first download the [Python wheel](https://realpython.com/python-wheels/) files for the libraries you need to install. + +!!! Note "Build using the same environment as you're installing" + + When downloading the `.whl` files and dependencies make sure you are using the same OS and Python version as you will be when installing in your isolated environment + +In the case of PyKX users can in a internet enabled environment either + +1. Download the `.whl` file for the OS, library version and Python version you are intending to use on the air-gapped environment. These files can be sourced from [here](https://pypi.org/project/pykx/#files). +2. Generate the `.whl` file from a git clone of the [PyKX repository](https://github.com/kxsystems/pykx). An example of this is as follows: + + ```bash + $ git clone https://github.com/kxsystems/pykx + $ cd pykx + $ pip install build + # The below will install the `.whl` to a `dist/` folder + $ python -m build . + ``` +Once locally downloaded the dependencies of the `*.whl` file can be downloaded as follows: + + ```bash + $ pip download dist/*.whl + ``` + +Copy the content of your `dist/` folder to an external storage device (USB-key etc.) and upload the `.whl` files to your air-gapped device. + +Install the wheels which for simplicity are stored at a location `/opt/airgap/wheels` + +```bash +pip install --no-cache /opt/airgap/wheels/* +``` + ### Verify PyKX can use the executable Verifying that PyKX has access to the executable can be done through execution of the function `#!python kx.util.start_q_subprocess` and requires either your configuration file or environment variables to include `PYKX_Q_EXECUTABLE`. This is outlined [here](#configuring-pykx-to-use-an-existing-executable). diff --git a/docs/help/troubleshooting.md b/docs/help/troubleshooting.md index 74cff45..b67d318 100644 --- a/docs/help/troubleshooting.md +++ b/docs/help/troubleshooting.md @@ -166,3 +166,48 @@ The following section outlines how a user can get access to a verbose set of env which q: /usr/local/anaconda3/bin/q q info: ``` + +## Development issues + +### Debugging q code issues + +If you are developing a library of q code, by default PyKX does not provide the full backtrace on error. As an example assume you have developed the a function and pass it an incorrect input + +```python +>>> import pykx as kx +>>> kx.q('{x+1}', 'e') +Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.12/site-packages/pykx/wrappers.py", line 5124, in __call__ + return _wrappers.function_call(self, args, no_gil) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "pykx/_wrappers.pyx", line 383, in pykx._wrappers.function_call + File "pykx/_wrappers.pyx", line 384, in pykx._wrappers.function_call + File "pykx/_wrappers.pyx", line 515, in pykx._wrappers.factory +pykx.exceptions.QError: type +``` + +While this provides you the appropriate `QError`, without setting the configuration value `PYKX_QDEBUG` your error does not indicate where your error has arisen from. Using the same example below you can see the backtrace information provided when setting the configuration value `PYKX_QDEBUG` to True. + +```python +>>> import os +>>> os.environ['PYKX_QDEBUG'] = 'True' +>>> import pykx as kx +>>> kx.q('{x+1}', 'e') +backtrace: + [2] {x+1} + ^ + [1] (.Q.trp) + + [0] {[pykxquery] .Q.trp[value; pykxquery; {2@"backtrace: + ^ +",.Q.sbt y;'x}]} +Traceback (most recent call last): + File "", line 1, in + File "/usr/local/anaconda3/lib/python3.12/site-packages/pykx/embedded_q.py", line 246, in __call__ + return factory(result, False, name=query.__str__()) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "pykx/_wrappers.pyx", line 522, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 515, in pykx._wrappers.factory +pykx.exceptions.QError: type +``` diff --git a/docs/license.md b/docs/license.md index ff701c8..7421799 100644 --- a/docs/license.md +++ b/docs/license.md @@ -1,6 +1,9 @@ # Licenses -PyKX operates on a dual licensing model with interactions with the q binary shared objects covered by a commercial license and remaining functionality supplied under an Apache 2 license. +All files contained within the [PyKX repository](https://github.com/kxsystems/pykx) are not covered by a single license. The following outlines the differences. + +1. All files and folders contained within the source code directory 'src/pykx/q.so/' are licensed under the terms of the 'Software License for q.so' which are included below +2. All other files within this repository are licensed under the "Apache 2.0" license include below ## Apache 2 License diff --git a/docs/pykx-under-q/api.md b/docs/pykx-under-q/api.md index 20a3d9e..673b49e 100644 --- a/docs/pykx-under-q/api.md +++ b/docs/pykx-under-q/api.md @@ -65,6 +65,7 @@ q)\l pykx.q [pyeval evaluate a string as Python code returning a foreign object](#pykxpyeval) [qeval evaluate a string as Python code returning a q object](#pykxqeval) [pyexec execute a string as Python code in Python memory](#pykxpyexec) +[typepy determine the target Python datatype of an object passed returning as a string](#pykxtypepy) **Python Library Integration:** [import import a Python library and store as a wrapped foreign object](#pykximport) @@ -446,6 +447,17 @@ q)show b:a`. foreign q).pykx.toq b 2 + +// Convert a PyKX conversion object back to q +q).pykx.toq .pykx.topd ([]5?1f;5?`a`b`c) + +x x1 +------------ +0.3017723 a +0.785033 a +0.5347096 c +0.7111716 b +0.411597 c ``` ## `.pykx.pycallable` @@ -711,12 +723,6 @@ q).pykx.safeReimport {system"q child.q"} "Hello World" ``` -**Parameter:** - -|Name|Type|Description| -|---|---|---| -|x||| - ## `.pykx.set` @@ -917,12 +923,12 @@ q).pykx.todefault til 10 enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9] // Pass a q list to Python treating the Python object as PyKX default -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault (til 10;til 10) - +q).pykx.typepy .pykx.todefault (til 10;til 10) +"" // Pass a q Table to Python by default treating the Python table as a Pandas DataFrame -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault ([]til 10;til 10) - +q).pykx.typepy .pykx.todefault ([]til 10;til 10) +"" ``` ## `.pykx.tok` @@ -952,12 +958,12 @@ q).pykx.tok til 10 enlist[`..k;;][0 1 2 3 4 5 6 7 8 9] // Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - +q).pykx.typepy til 10 +"" // Pass a q object to Python treating the Python object as a PyKX object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tok til 10 - +q).pykx.typepy .pykx.tok til 10 +"" ``` ## `.pykx.tonp` @@ -990,12 +996,12 @@ enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9] q).pykx.util.defaultConv:"py" // Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - +q).pykx.typepy til 10 +"" // Pass a q object to Python treating the Python object as a Numpy Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tonp til 10 - +q).pykx.typepy .pykx.tonp til 10 +"" ``` ## `.pykx.topa` @@ -1025,12 +1031,12 @@ q).pykx.topa til 10 enlist[`..pyarrow;;][0 1 2 3 4 5 6 7 8 9] // Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - +q).pykx.typepy til 10 +"" // Pass a q object to Python treating the Python object as a PyArrow Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topa til 10 - +q).pykx.typepy .pykx.topa til 10 +"" ``` ## `.pykx.topd` @@ -1061,12 +1067,50 @@ enlist[`..pandas;;][0 1 2 3 4 5 6 7 8 9] // Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - +q).pykx.typepy til 10 +"" // Pass a q object to Python treating the Python object as a Pandas Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topd til 10 - +q).pykx.typepy .pykx.topd til 10 +"" +``` + +## `.pykx.topt` + + +_Tag a q object to be indicate conversion to a PyTorch object when called in Python (BETA)_ + +```q +.pykx.topt[qObject] +``` + +**Parameters:** + +name | type | description | +----------|---------|-------------| +`qObject` | `any` | A q object which is to be defined as a PyTorch object in Python. | + +**Return:** + +type | description +-------------|------------ +`projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a Torch type object. | + +```q +// Denote that a q object once passed to Python should be managed as a Numpy object +q).pykx.topt til 10 +enlist[`..torch;;][0 1 2 3 4 5 6 7 8 9] + +// Update the default conversion type to be non numpy +q).pykx.setdefault"pt" + +// Pass a q object to Python with default conversions and return type +q).pykx.typepy til 10 +"" + +// Pass a q object to Python treating the Python object as a Numpy Object +q).pykx.typepy .pykx.tonp til 10 +"" ``` ## `.pykx.topy` @@ -1096,12 +1140,12 @@ q).pykx.topy til 10 enlist[`..python;;][0 1 2 3 4 5 6 7 8 9] // Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - +q).pykx.typepy til 10 +"" // Pass a q object to Python treating the Python object as a Python Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topy til 10 - +q).pykx.typepy .pykx.topy til 10 +"" ``` ## `.pykx.toq0` @@ -1175,12 +1219,47 @@ q).pykx.toraw til 10 enlist[`..raw;;][0 1 2 3 4 5 6 7 8 9] // Pass a q object to Python with default conversions and return type -q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 - +q).pykx.typepy til 10 +"" // Pass a q object to Python treating the Python object as a raw Object -q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.toraw til 10 - +q).pykx.typepy .pykx.toraw til 10 +"" +``` + +## `.pykx.typepy` + + +_Determine the datatype of an object passed to Python and return it as a string_ + +```q +.pykx.typepy[object] +``` + +**Parameters:** + +name | type | description +-----------|----------|------------- +`object` | `any` | An object that is passed to python and its datatype determined. + +**Returns:** + +type | description +---------|------------ +`string` | The string representation of an objects datatype after being passed to python + +**Example:** + +```q +q)\l pykx.q +q).pykx.typepy 1 +"" + +q).pykx.typepy (10?1f;10?1f) +"" + +q).pykx.typepy ([]100?1f;100?1f) +"" ``` ## `.pykx.unwrap` @@ -1534,15 +1613,6 @@ q).pykx.dash.runFunction["import numpy as np\n\ndef func(x):\n\treturn np.linspa 0 2.5 5 7.5 10 ``` -**Parameters:** - -|Name|Type|Description| -|---|---|---| -|pyCode||| -|args||| - - - ## `.pykx.dash.util.getFunction` @@ -1569,9 +1639,3 @@ type | description | q).pykx.dash.util.getFunction["def func(x):\n\treturn 1"] {[f;x].pykx.util.pykx[f;x]}[foreign]enlist ``` - -**Parameter:** - -|Name|Type|Description| -|---|---|---| -|pyCode||| diff --git a/docs/pykx-under-q/intro.md b/docs/pykx-under-q/intro.md index c02b7c5..72a2efd 100644 --- a/docs/pykx-under-q/intro.md +++ b/docs/pykx-under-q/intro.md @@ -486,17 +486,17 @@ func[>;arg] / equivalent #### Function argument types -PyKX supports data type conversions between q and Python for Python native objects, Numpy objects, Pandas objects, PyArrow objects, and PyKX objects. +PyKX supports data type conversions between q and Python for Python native objects, NumPy objects, Pandas objects, PyArrow objects, and PyKX objects. -By default, when passing a q object to a callable function, it's converted to the most "natural" analogous type, as detailed below: +By default, when passing a q object to a callable function, it's converted to the most "natural" analogous type, as detailed below: - PyKX/q generic list objects become Python lists. - PyKX/q table/keyed table objects become Pandas equivalent DataFrames. -- All other PyKX/q objects become their analogous numpy equivalent types. +- All other PyKX/q objects become their analogous NumPy equivalent types. !!! Warning - Prior to PyKX 2.1.0, all conversions from q objects to Python would convert to their Numpy equivalent. To achieve this now, set the environment variable `PYKX_DEFAULT_CONVERSION="np"` + Prior to PyKX 2.1.0, all conversions from q objects to Python would convert to their NumPy equivalent. To achieve this now, set the environment variable `PYKX_DEFAULT_CONVERSION="np"` For function/method calls, control the default behavior of the conversions by setting `#!python .pykx.util.defaultConv`: @@ -506,70 +506,68 @@ q).pykx.util.defaultConv ``` You can apply one of the following values: -|**Python type**|Default|Python|Numpy|Pandas|PyArrow|PyKX| +|**Python type**|Default|Python|NumPy|Pandas|PyArrow|PyKX| |---------------|-------|------|-----|------|-------|----| |**Value**: |"default"|"py"|"np"|"pd"|"pa"|"k"| -In the example below, we start with Numpy and update the default types across all function calls: +In the example below, we start with NumPy and update the default types across all function calls: -=== "Numpy" +=== "NumPy" ```q - q)typeFunc:.pykx.eval"lambda x:print(type(x))" - q)typeFunc 1; - - q)typeFunc til 10; - - q)typeFunc (10?1f;10?1f) - - q)typeFunc ([]100?1f;100?1f); - + q).pykx.typepy 1; + "" + q).pykx.typepy til 10; + "" + q).pykx.typepy (10?1f;10?1f) + "" + q).pykx.typepy ([]100?1f;100?1f); + "" ``` === "Python" ```q - q)typeFunc:.pykx.eval"lambda x:print(type(x))" q).pykx.util.defaultConv:"py" - q)typeFunc 1; - - q)typeFunc til 10; - - q)typeFunc ([]100?1f;100?1f); - + q).pykx.typepy 1; + "" + q).pykx.typepy til 10; + "" + q).pykx.typepy ([]100?1f;100?1f); + "" ``` === "Pandas" ```q q).pykx.util.defaultConv:"pd" - q)typeFunc 1; - - q)typeFunc til 10; - - q)typeFunc ([]100?1f;100?1f); - + q).pykx.typepy 1; + "" + q).pykx.typepy til 10; + "" + q).pykx.typepy ([]100?1f;100?1f); + "" ``` === "PyArrow" ```q q).pykx.util.defaultConv:"pa" - q)typeFunc 1; - - q)typeFunc til 10; - - q)typeFunc ([]100?1f;100?1f); - + q).pykx.typepy 1; + "" + q).pykx.typepy til 10; + "" + q).pykx.typepy ([]100?1f;100?1f); + "" ``` === "PyKX" ```q q).pykx.util.defaultConv:"k" - q)typeFunc 1; - - q)typeFunc til 10; - - q)typeFunc ([]100?1f;100?1f); - + q).pykx.typepy 1; + "" + q).pykx.typepy til 10; + "" + q).pykx.typepy ([]100?1f;100?1f); + "" ``` Alternatively, to modify individual arguments to functions, use the `#!python .pykx.to*` functionality: diff --git a/docs/pykx-under-q/upgrade.md b/docs/pykx-under-q/upgrade.md index 3d0d0c7..edc3ac4 100644 --- a/docs/pykx-under-q/upgrade.md +++ b/docs/pykx-under-q/upgrade.md @@ -148,7 +148,8 @@ q)pyfunc `a x1| 0.5592623 x2| 0.486176 ``` + ## Next steps -- Learn[How to use PyKX within q](../pykx-under-q/intro.md). +- Learn [how to use PyKX within q](../pykx-under-q/intro.md). - Use the [pykx.q Library Reference Card](../pykx-under-q/api.md). \ No newline at end of file diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index e786ada..d4aa1cc 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -4,6 +4,339 @@ The changelog presented here outlines changes to PyKX when operating within a Python environment specifically, if you require changelogs associated with PyKX operating under a q environment see [here](./underq-changelog.md). +## PyKX 3.1.0 + +#### Release Date + +2025-02-11 + +### Additions + +- Added support for `Python 3.13`. +- Added support for `NumPy 2.0` +- Added `kx.Lambda.value` property to return the value of the Lambda object, specific details on return values are available [here](https://code.kx.com/q/ref/value/#lambda). +- Added `kx.Lambda.string` property which returns the string of the Lambda function unaffected by console size as the inbuilt `str(Lambda)` method is. +- Added ability for user to explicitly create Lambda objects from strings. + + ```python + >>> import pykx as kx + >>> kx.Lambda('{1+1}') + pykx.Lambda(pykx.q('{1+1}')) + ``` + +- When using the `kx.tick` module users initializing a real-time processor (RTP) or historical database (HDB) can now provide the `tables` parameter at startup to allow definition of table schemas for derived data at initialization or using the `set_tables` method following process start. + + ```python + >>> import pykx as kx + >>> prices = kx.schema.builder({ + ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom, + ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom, + ... 'px': kx.FloatAtom}) + >>> rte = kx.tick.RTP(port=5034, + ... subscriptions = ['trade', 'quote'], + ... tables = {'price': prices}, + ... vanilla=False) + >>> rte('price') + pykx.Table(pykx.q(' + time sym exchange sz px + ----------------------- + ')) + >>> rte.set_tables({'px': prices}) + >>> rte('px') + pykx.Table(pykx.q(' + time sym exchange sz px + ----------------------- + ')) + ``` + +- Addition of `reshape` keyword to the `.np()` method of `kx.List` objects. This can provide two benefits: + + 1. Conversions of `kx.List` objects to NumPy by default produce an array of NumPy arrays rather than an N-Dimensional NumPy array. Setting `reshape=True` when handling N-Dimensional rectangular lists allows the shape to be pre-processed prior to conversion and a more natural N-Dimensional NumPy array to be generated. + + === "Default conversion" + + ```python + >>> kx.q('2 2#4?1f').np() + array([ + array([0.47078825, 0.63467162]), + array([0.96723983, 0.23063848]) + ], + dtype=object) + ``` + + === "Using reshape=True" + + ```python + >>> kx.q('2 2#4?1f').np(reshape=True) + array([[0.94997503, 0.43908099], + [0.57590514, 0.59190043]]) + ``` + + 2. Provide a performance boost when converting regularly shaped (rectangular) N-Dimensional lists of uniform type when the shape of the resulting numpy array is known prior to conversion + + ```python + >>> import pykx as kx + >>> lst = kx.q('100000 100 10#100000000?1f') + >>> %timeit lst.np() + 9.72 s ± 272 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + >>> %timeit lst.np(reshape=[100000, 100, 10]) + 883ms ± 19.8 ms per loop (mean ± std. dev. of 7 runs, 1 loops each) + ``` + +- Added support for the creation and management of splayed format databases when using the `#!python pykx.DB` class. + + ```python + >>> import pykx as kx + >>> db = kx.DB(path='/tmp/splay') + >>> tab = kx.Table(data={ + ... 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + ... 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + ... 'p': kx.q('101 102 101.5 102.5'), + ... 'sz': kx.q('100 200 150 210'), + ... 'sym': kx.q('`a`b`b`c') + ... }) + >>> db.create(tab, 'trade', format='splayed') + >>> db.list_columns('trade') + ['date', 'ti', 'p', 'sz', 'sym'] + >>> db.rename_columns('trade', 'p', 'price') + 2025.01.28 11:18:54 renaming p to price in `:/tmp/splay/trade + >>> db.list_columns('trade') + ['date', 'ti', 'price', 'sz', 'sym'] + ``` + +- Addition of `.copy()` method for all `pykx` objects allowing users to modify copied objects without interfering with the original object. + + ```python + >>> import pykx as kx + >>> vec = kx.q.til(5) + >>> as_vec = vec + >>> cp_vec = vec.copy() + >>> vec[1] = 20 + >>> vec + pykx.LongVector(pykx.q('0 20 2 3 4')) + >>> as_vec + pykx.LongVector(pykx.q('0 20 2 3 4')) + >>> cp_vec + pykx.LongVector(pykx.q('0 1 2 3 4')) + ``` + +- IPC file execution logic now allows execution of `.py` files on remote servers by executing using the Python [`exec`](https://docs.python.org/3/library/functions.html#exec) function if PyKX is loaded on the remote server. + + ```python + >>> import pykx as kx + >>> with kx.SyncQConnection(port=5050) as q: + ... q.file_execute('./file.py') + ``` + +- Added `async_response` keyword argument when calling `AsyncQConnection` objects with `reuse=False` and `wait=False` to allow keeping the connection alive until an asynchronous response message has been received. + + ```python + >>> async with kx.AsyncQConnection(port=5050) as q: + >>> future = await q('system"sleep 5"; show"x"; neg[.z.w]"til 5"', wait=False, reuse=False, async_response=True) + >>> print(await future) + pykx.LongVector(pykx.q('0 1 2 3 4')) + ``` + +### Fixes and Improvements + +- Application of the `#!python str` function on empty PyKX objects could return unexpected results + + === "Behavior prior to change" + + ```python + >>> import pykx as kx + >>> str(kx.q('()!()')) + '' + >>> str(kx.q('()')) + '' + ``` + + === "Behavior post change" + + ```python + >>> import pykx as kx + >>> str(kx.q('()!()')) + '()!()' + >>> str(kx.q('()')) + '()' + ``` + +- Removal of previously deprecated use of keyword `labels` when using the `rename` method for table objects. Users should use the `mapper` keyword to maintain the same behavior. +- Removal of now unneeded warning indicating that use of `.pykx.q` calls are not supported when operating in a threading environment. +- Attempting to generate an `fby` clause using the syntax `kx.Column().fby()` now errors, instead pointing to the correct syntax `kx.Column.fby()`. +- When a client attempted to retrieve an object which could not be serialized from a PyKX server it resulted in the client process hanging. An appropriate error is now sent to the client. + + ```python + q)h:hopen 5000 + q)h".pykx.getattr" + 'Result of query with return type '' failed to serialize for IPC transport. + ``` + +- Addition of warning if the configuration value `QLIC` is set as a non directory path. + + ```python + >>> import os + >>> os.environ['QLIC'] = 'invalid_path' + >>> import pykx as kx + UserWarning: Configuration value QLIC set to non directory value: invalid_path + ``` + +- Database generation functionality now allows users to pass any data-type which will convert to a `#!python pykx.Table` as the `#!python table` parameter, such as a `#!python pandas.DataFrame` or `#!python pyarrow.Table`. + + ```python + >>> import pykx as kx + >>> import pandas as pd + >>> import numpy as np + >>> data = pd.DataFrame({ + ... 'time': np.array([1,2,3], dtype='timedelta64[us]'), + ... 'sym': ['msft', 'ibm', 'ge'], + ... 'qty': [100, 200, 150]}) + >>> db = kx.DB(path='/tmp/db') + >>> db.create(data, 'tab', kx.DateAtom(2020, 1, 1)) + Writing Database Partition 2020.01.01 to table tab + ``` + +- Attempting to create a partitioned databases/add a partition to a database with a `sym_enum` keyword but no `by_field` would result in a `KeyError`. +- Improved output when a licence file cannot be found, full paths checked for a license file are now shown and default license installation process is offered to user. +- Users are now given the option to input a base64 encoded string when activating an existing license +- Using `math.inf` or `-math.inf` when creating numeric values now creates equivalent PyKX types + + ```python + >>> import pykx as kx + >>> import math + >>> kx.ShortAtom(math.inf) + pykx.ShortAtom(pykx.q('0Wh')) + >>> kx.FloatAtom(-math.inf) + pykx.FloatAtom(pykx.q('-0w')) + ``` + +- Attempting to execute a local file on a remote connection containing multiple empty newlines would result in an execution error. + + ```python + >>> import pykx as kx + >>> content = """.test.testFunc:{[x;y] + ... z: 1+1; + ... + ... k: 2+3; + ... + ... x+y + ... }; + ... + ... + ... .test.testFunc[1;100] + ... """ + >>> with open('test.q', 'w') as file: + ... file.write(content) + >>> conn = kx.SyncQConnection(port=5010) + >>> conn.file_execute('test.q', return_all=True) + ``` + +- Fixed a bug where `QFuture` objects returned by `AsyncQConnection` objects could block each other unnecessarily. + + ```python + async def async_query(port, qry): + async with await kx.AsyncQConnection(port=port) as q: + return await q(qry) + + async with asyncio.TaskGroup() as tg: + tg.create_task(async_query(5050, '{system"sleep 2"; til 10}[]')) + tg.create_task(async_query(5051, '{system"sleep 5"; til 20}[]')) + tg.create_task(async_query(5052, '{system"sleep 1"; til 5}[]')) + tg.create_task(async_query(5053, '{system"sleep 3"; til 7}[]')) + # Previously took 11 seconds now correctly returns the results in the order they complete and + # takes 5 seconds to run. + ``` + +!!! Note + + All QFuture objects returned from calls to `RawQConnection` objects must be awaited to recieve their results. Previously you could use just `conn.poll_recv()` and then directly get the result with `future.result()`. + +- Fixed error when attempting to convert `numpy.datetime64` variables to `kx.TimestampAtom` objects directly using the `kx.TimestampAtom` constructor method. + + === "Behavior prior to change" + + ```python + >>> x = np.datetime64('now', 'ns') + >>> kx.TimestampAtom(x) + Traceback (most recent call last): + File "", line 1, in + File "/home/andymc/work/KXI-17767/KXI-17767/lib/python3.10/site-packages/pykx/wrappers.py", line 924, in __new__ + return toq(x, ktype=None if cls is K else cls, cast=cast) # TODO: 'strict' and 'cast' flags + File "pykx/toq.pyx", line 2672, in pykx.toq.ToqModule.__call__ + File "pykx/toq.pyx", line 1922, in pykx.toq.from_datetime_datetime + TypeError: unsupported operand type(s) for -: 'int' and 'datetime.datetime' + ``` + + === "Behavior post change" + + ```python + >>> x = np.datetime64('now', 'ns') + >>> kx.TimestampAtom(x) + pykx.TimestampAtom(pykx.q('2025.01.28D09:43:06.000000000')) + ``` + +- Fixed error when attempting to convert a `pandas.Categorical` to an existing `pykx.EnumVector` via `pykx.toq`. If the `pandas.Categorical` data contained a value outside of the `pykx.EnumVector` an error was thrown. + + === "Behavior prior to change" + + ```python + >>> cat = pd.Series(['aaa', 'bbb', 'ccc'], dtype='category', name='cat') + >>> kx.toq(cat) + pykx.EnumVector(pykx.q('`cat$`aaa`bbb`ccc')) + >>> cat = pd.Series(['aaa', 'bbb', 'ccc', 'ddd'], dtype='category', name='cat') + >>> kx.toq(cat) + Traceback (most recent call last): + File "", line 1, in + File "pykx/toq.pyx", line 2964, in pykx.toq.ToqModule.__call__ + File "pykx/toq.pyx", line 1715, in pykx.toq.from_pandas_series + File "pykx/toq.pyx", line 1555, in pykx.toq._to_numpy_or_categorical + File "pykx/toq.pyx", line 1828, in pykx.toq.from_pandas_categorical + File "/home/phagan/brokeCategories/brokeCategoryEnv/lib/python3.11/site-packages/pykx/embedded_q.py", line 246, in __call__ + return factory(result, False, name=query.__str__()) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "pykx/_wrappers.pyx", line 522, in pykx._wrappers._factory + File "pykx/_wrappers.pyx", line 515, in pykx._wrappers.factory + pykx.exceptions.QError: cast + ``` + + === "Behavior post change" + + ```python + >>> cat = pd.Series(['aaa', 'bbb', 'ccc'], dtype='category', name='cat') + >>> kx.toq(cat) + pykx.EnumVector(pykx.q('`cat$`aaa`bbb`ccc')) + >>> cat = pd.Series(['aaa', 'bbb', 'ccc', 'ddd'], dtype='category', name='cat') + >>> kx.toq(cat) + pykx.EnumVector(pykx.q('`cat$`aaa`bbb`ccc`ddd')) + ``` + +- IPC file execution now raises an error message if users attempt to use an unsupported file extension. Supported extensions: `.k`, `.q`, `.p`, `.py`. + + ```python + >>> conn = kx.SyncQConnection(port=5050) + >>> conn.file_execute('file.l') + QError: Provided file type 'l' unsupported + ``` + +- Error message when checking a license referenced a function `pykx.util.install_license` which is deprecated, this has now been updated to reference `pykx.license.install` + +### Beta Features + +- Added ability for users to convert between PyKX numeric vectors or N-Dimensional Lists and PyTorch Tensor objects using the `pt` method. + + ```python + >>> import os + >>> os.environ['PYKX_BETA_FEATURES'] = 'True' + >>> import pykx as kx + >>> kx.q.til(10).pt() + tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + >>> kx.q('4 5#20?1f').pt() + tensor([[0.3928, 0.5171, 0.5160, 0.4067, 0.1781], + [0.3018, 0.7850, 0.5347, 0.7112, 0.4116], + [0.4932, 0.5785, 0.0839, 0.1960, 0.3756], + [0.6137, 0.5295, 0.6916, 0.2297, 0.6920]], dtype=torch.float64) + ``` + ## PyKX 3.0.1 #### Release Date @@ -16,6 +349,19 @@ ### Additions +- Addition of `.replace()` function to `kx.Vector` and `kx.List` objects to search for and replace items in each collection, retaining typing where appropriate. + + ```python + >>> l = kx.q('("a";3;1.3;`b)') + >>> l.replace(1.3, "junk") + pykx.List(pykx.q(' + "a" + 3 + `junk + `b + ')) + ``` + - Addition of the property `#!python day` to `#!python kx.Column` objects to allow users to retrieve the day of month of a timestamp. ```python diff --git a/docs/release-notes/underq-changelog.md b/docs/release-notes/underq-changelog.md index e5f5d7a..a4fd998 100644 --- a/docs/release-notes/underq-changelog.md +++ b/docs/release-notes/underq-changelog.md @@ -6,6 +6,90 @@ This changelog provides updates from PyKX 2.0.0 and above, for information relat The changelog presented here outlines changes to PyKX when operating within a q environment specifically, if you require changelogs associated with PyKX operating within a Python environment see [here](./changelog.md). +## PyKX 3.1.0 + +#### Release Date + +2025-02-11 + +### Additions + +- Addition of `.pykx.typepy` which returns an objects datatype as a `CharVector` after being passed to python. + + ```q + q).pykx.typepy til 10 // Data type conversion set to default + "" + + q).pykx.util.defaultConv:"pd" // Set data type conversion to pandas + q).pykx.typepy til 10 + "" + ``` + +### Fixes and Improvements + +- Using `.pykx.toq`/`.pykx.toq0` now return the q representation of an object when passed a wrapped type conversion object + + === "Behaviour prior to change" + + ```q + q).pykx.toq .pykx.topd ([] a:1 2 3) + enlist[`..pandas;;][... + ``` + + === "Behaviour post change" + + ```q + q).pykx.toq .pykx.topd[([] a:1 2 3)] + a + - + 1 + 2 + 3 + ``` + +- When using `.pykx.toq`/`.pykx.toq0`, passing compositions such as `any` now returns the data as the appropriate object + + === "Behaviour prior to change" + + ```q + q).pykx.toq any + 'Expected foreign object for call to .pykx.toq + ``` + + === "Behaviour post change" + + ```q + q).pykx.toq any + max$["b"] + ``` + +- When failing to find a file loaded with `.pykx.loadPy` the name of the file which was loaded is now included in the error message + + === "Behaviour prior to change" + + ```q + q).pykx.loadPy "file.py" + 'FileNotFoundError(2, 'No such file or directory') + ``` + + === "Behaviour post change" + + ```q + q).pykx.loadPy "file.py" + 'FileNotFoundError(file.py, 'No such file or directory') + ``` + +- When attempting to load PyKX after embedPy has already been loaded, an error will be thrown and PyKX will not continue to load. + +### Beta Features + +- Added ability for users to convert between PyKX numeric vectors or N-Dimensional Lists and PyTorch Tensor objects using the `.pykx.topt` function. + + ```python + q).pykx.eval["lambda x:print(type(x))"].pykx.topt enlist til 10; + + ``` + ## PyKX 3.0.1 #### Release Date diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 3a6dccb..66025ba 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -23,94 +23,94 @@ "\n", "| **DataFrame properties**| **PyKX supported?** | **PyKX API documentation link** | \n", "|----------------------|-----------------|-----------------------------|\n", - "| [columns](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.columns.html) | :material-check: | [link](Pandas_API.html#tablecolumns) | \n", - "| [dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dtypes.html) | :material-check: | [link](Pandas_API.html#tabledtypes) |\n", - "| [empty](https://https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.empty.html) | :material-check: | [link](Pandas_API.html#tableempty) |\n", - "| [ndim](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.ndim.html) | :material-check: | [link](Pandas_API.html#tablendim) |\n", - "| [shape](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.shape.html) | :material-check: | [link](Pandas_API.html#tableshape) |\n", - "| [size](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.size.html) | :material-check: | [link](Pandas_API.html#tablesize) |\n", + "| [columns](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.columns.html) | Yes | [link](Pandas_API.html#tablecolumns) | \n", + "| [dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dtypes.html) | Yes | [link](Pandas_API.html#tabledtypes) |\n", + "| [empty](https://https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.empty.html) | Yes | [link](Pandas_API.html#tableempty) |\n", + "| [ndim](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.ndim.html) | Yes | [link](Pandas_API.html#tablendim) |\n", + "| [shape](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.shape.html) | Yes | [link](Pandas_API.html#tableshape) |\n", + "| [size](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.size.html) | Yes | [link](Pandas_API.html#tablesize) |\n", "\n", "### Analytic functionality\n", "\n", "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n", "|----------------------|-----------------|-----------------------------|\n", - "| [abs](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.abs.html) | :material-check: | [link](Pandas_API.html#tableabs) |\n", - "| [agg](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.agg.html) | :material-check: | [link](Pandas_API.html#tableagg) |\n", - "| [apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) | :material-check: | [link](Pandas_API.html#tableapply) |\n", - "| [applymap](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.applymap.html) | :material-check: | [link](Pandas_API.html#tableapplymap) |\n", - "| [groupby](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) | :material-check: | [link](Pandas_API.html#tablegroupby) |\n", - "| [idxmax](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmax.html) | :material-check: | [link](Pandas_API.html#tableidxmax) |\n", - "| [idxmin](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmin.html) | :material-check: | [link](Pandas_API.html#tableidxmin) |\n", - "| [kurt](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.kurt.html) | :material-check: | [link](Pandas_API.html#tablekurt) |\n", - "| [max](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.max.html) | :material-check: | [link](Pandas_API.html#tablemax) |\n", - "| [map](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.map.html) | :material-check: | [link](Pandas_API.html#tablemap) |\n", - "| [mean](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mean.html) | :material-check: | [link](Pandas_API.html#tablemean) |\n", - "| [median](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.median.html) | :material-check: | [link](Pandas_API.html#tablemedian) |\n", - "| [min](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.min.html) | :material-check: | [link](Pandas_API.html#tablemin) |\n", - "| [mode](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html) | :material-check: | [link](Pandas_API.html#tablemode) |\n", - "| [sem](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sem.html) | :material-check: | [link](Pandas_API.html#tablesem) |\n", - "| [sum](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sum.html) | :material-check: | [link](Pandas_API.html#tablesum) |\n", - "| [skew](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.skew.html) | :material-check: | [link](Pandas_API.html#tableskew) |\n", - "| [std](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.std.html) | :material-check: | [link](Pandas_API.html#tablestd) |\n", - "| [prod](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.prod.html) | :material-check: | [link](Pandas_API.html#tableprod) |\n", + "| [abs](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.abs.html) | Yes | [link](Pandas_API.html#tableabs) |\n", + "| [agg](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.agg.html) | Yes | [link](Pandas_API.html#tableagg) |\n", + "| [apply](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) | Yes | [link](Pandas_API.html#tableapply) |\n", + "| [applymap](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.applymap.html) | Yes | [link](Pandas_API.html#tableapplymap) |\n", + "| [groupby](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html) | Yes | [link](Pandas_API.html#tablegroupby) |\n", + "| [idxmax](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmax.html) | Yes | [link](Pandas_API.html#tableidxmax) |\n", + "| [idxmin](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.idxmin.html) | Yes | [link](Pandas_API.html#tableidxmin) |\n", + "| [kurt](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.kurt.html) | Yes | [link](Pandas_API.html#tablekurt) |\n", + "| [max](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.max.html) | Yes | [link](Pandas_API.html#tablemax) |\n", + "| [map](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.map.html) | Yes | [link](Pandas_API.html#tablemap) |\n", + "| [mean](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mean.html) | Yes | [link](Pandas_API.html#tablemean) |\n", + "| [median](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.median.html) | Yes | [link](Pandas_API.html#tablemedian) |\n", + "| [min](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.min.html) | Yes | [link](Pandas_API.html#tablemin) |\n", + "| [mode](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.mode.html) | Yes | [link](Pandas_API.html#tablemode) |\n", + "| [sem](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sem.html) | Yes | [link](Pandas_API.html#tablesem) |\n", + "| [sum](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sum.html) | Yes | [link](Pandas_API.html#tablesum) |\n", + "| [skew](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.skew.html) | Yes | [link](Pandas_API.html#tableskew) |\n", + "| [std](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.std.html) | Yes | [link](Pandas_API.html#tablestd) |\n", + "| [prod](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.prod.html) | Yes | [link](Pandas_API.html#tableprod) |\n", "\n", "### Querying and data interrogation\n", "\n", "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n", "|----------------------|-----------------|-----------------------------|\n", - "| [all](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.all.html) | :material-check: | [link](Pandas_API.html#tableall) |\n", - "| [any](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.any.html) | :material-check: | [link](Pandas_API.html#tableany) |\n", - "| [at](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html) | :material-check: | [link](Pandas_API.html#tableat) |\n", - "| [count](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.count.html) | :material-check: | [link](Pandas_API.html#tablecount) |\n", - "| [get](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.get.html) | :material-check: | [link](Pandas_API.html#tableget) |\n", - "| [head](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html) | :material-check: | [link](Pandas_API.html#tablehead) |\n", - "| [iloc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html) | :material-check: | [link](Pandas_API.html#tableiloc) |\n", - "| [isna](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isna.html) | :material-check: | [link](Pandas_API.html#tableisna) |\n", - "| [isnull](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isnull.html) | :material-check: | [link](Pandas_API.html#tableisnull) |\n", - "| [loc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html) | :material-check: | [link](Pandas_API.html#tableloc) |\n", - "| [notna](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.notna.html) | :material-check: | [link](Pandas_API.html#tablenotna) |\n", - "| [notnull](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.notnull.html) | :material-check: | [link](Pandas_API.html#tablenotnull) |\n", - "| [sample](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html) | :material-check: | [link](Pandas_API.html#tablesample) |\n", - "| [select_dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.select_dtypes.html) | :material-check: | [link](Pandas_API.html#tableselect_dtypes) |\n", - "| [tail](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html) | :material-check: | [link](Pandas_API.html#tabletail) |\n", + "| [all](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.all.html) | Yes | [link](Pandas_API.html#tableall) |\n", + "| [any](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.any.html) | Yes | [link](Pandas_API.html#tableany) |\n", + "| [at](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.at.html) | Yes | [link](Pandas_API.html#tableat) |\n", + "| [count](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.count.html) | Yes | [link](Pandas_API.html#tablecount) |\n", + "| [get](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.get.html) | Yes | [link](Pandas_API.html#tableget) |\n", + "| [head](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html) | Yes | [link](Pandas_API.html#tablehead) |\n", + "| [iloc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html) | Yes | [link](Pandas_API.html#tableiloc) |\n", + "| [isna](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isna.html) | Yes | [link](Pandas_API.html#tableisna) |\n", + "| [isnull](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.isnull.html) | Yes | [link](Pandas_API.html#tableisnull) |\n", + "| [loc](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.loc.html) | Yes | [link](Pandas_API.html#tableloc) |\n", + "| [notna](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.notna.html) | Yes | [link](Pandas_API.html#tablenotna) |\n", + "| [notnull](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.notnull.html) | Yes | [link](Pandas_API.html#tablenotnull) |\n", + "| [sample](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html) | Yes | [link](Pandas_API.html#tablesample) |\n", + "| [select_dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.select_dtypes.html) | Yes | [link](Pandas_API.html#tableselect_dtypes) |\n", + "| [tail](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.tail.html) | Yes | [link](Pandas_API.html#tabletail) |\n", "\n", "### Data preprocessing\n", "\n", "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n", "|----------------------|-----------------|-----------------------------|\n", - "| [add_prefix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_prefix.html) | :material-check: | [link](Pandas_API.html#tableas_prefix) |\n", - "| [add_suffix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_suffix.html) | :material-check: | [link](Pandas_API.html#tableas_suffix) |\n", - "| [astype](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html) | :material-check: | [link](Pandas_API.html#tableastype) |\n", - "| [drop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) | :material-check: | [link](Pandas_API.html#tabledrop) |\n", - "| [drop_duplicates](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html) | :material-check: | [link](Pandas_API.html#tabledrop_duplicates) |\n", - "| [pop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pop.html) | :material-check: | [link](Pandas_API.html#tablepop) |\n", - "| [rename](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html) | :material-check: | [link](Pandas_API.html#tablerename) |\n", - "| [reset_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html) | :material-check: | [link](Pandas_API.html#tablereset_index) |\n", - "| [set_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html) | :material-check: | [link](Pandas_API.html#tableset_index) |\n", + "| [add_prefix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_prefix.html) | Yes | [link](Pandas_API.html#tableas_prefix) |\n", + "| [add_suffix](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.add_suffix.html) | Yes | [link](Pandas_API.html#tableas_suffix) |\n", + "| [astype](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html) | Yes | [link](Pandas_API.html#tableastype) |\n", + "| [drop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) | Yes | [link](Pandas_API.html#tabledrop) |\n", + "| [drop_duplicates](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop_duplicates.html) | Yes | [link](Pandas_API.html#tabledrop_duplicates) |\n", + "| [pop](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pop.html) | Yes | [link](Pandas_API.html#tablepop) |\n", + "| [rename](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html) | Yes | [link](Pandas_API.html#tablerename) |\n", + "| [reset_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html) | Yes | [link](Pandas_API.html#tablereset_index) |\n", + "| [set_index](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html) | Yes | [link](Pandas_API.html#tableset_index) |\n", "\n", "### Data joins/merge\n", "\n", "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** |\n", "|----------------------|-----------------|-----------------------------|\n", - "| [merge](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) | :material-check: | [link](Pandas_API.html#tablemerge) |\n", - "| [merge_asof](https://pandas.pydata.org/docs/reference/api/pandas.merge_asof.html) | :material-check: | [link](Pandas_API.html#tablemerge_asof) |\n", + "| [merge](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html) | Yes | [link](Pandas_API.html#tablemerge) |\n", + "| [merge_asof](https://pandas.pydata.org/docs/reference/api/pandas.merge_asof.html) | Yes | [link](Pandas_API.html#tablemerge_asof) |\n", "\n", "### Data sorting\n", "\n", "| **DataFrame method** | **PyKX supported?** | **PyKX API documentation link** | \n", "|----------------------|-----------------|-----------------------------|\n", - "| [sort_values](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html) | :material-check: | [link](Pandas_API.html#tablesort_values) |\n", - "| [nlargest](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nlargest.html) | :material-check: | [link](Pandas_API.html#tablenlargest) |\n", - "| [nsmallest](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nsmallest.html) | :material-check: | [link](Pandas_API.html#tablensmallest) |\n", + "| [sort_values](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html) | Yes | [link](Pandas_API.html#tablesort_values) |\n", + "| [nlargest](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nlargest.html) | Yes | [link](Pandas_API.html#tablenlargest) |\n", + "| [nsmallest](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.nsmallest.html) | Yes | [link](Pandas_API.html#tablensmallest) |\n", "\n", "### Unsupported functionality\n", "\n", "| **DataFrame methods** | **PyKX supported?** | **Additional information** |\n", "|----------------------|------------------|------------------------|\n", - "| `*from*` | :material-close: | Functionality for the creation of PyKX Tables from alternative data sources is not supported at this time. |\n", - "| `*plot*` | :material-close: | Functionality for the plotting of columns/tables is not supported at this time. |\n", - "| `*sparse*` | :material-close: | Sparse data like interactions presently not supported. |\n", - "| `to_*` | :material-close: | Functionality for the conversion/persistence of PyKX Tables to other formats is not supported at this time. |\n" + "| `*from*` | No | Functionality for the creation of PyKX Tables from alternative data sources is not supported at this time. |\n", + "| `*plot*` | No | Functionality for the plotting of columns/tables is not supported at this time. |\n", + "| `*sparse*` | No | Sparse data like interactions presently not supported. |\n", + "| `to_*` | No | Functionality for the conversion/persistence of PyKX Tables to other formats is not supported at this time. |\n" ] }, { @@ -4562,7 +4562,7 @@ "#### Table.rename()\n", "\n", "```\n", - "Table.rename(labels=None, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore', mapper=None)\n", + "Table.rename(mapper=None, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore')\n", "```\n", "\n", "Rename columns in a table and return the resulting Table object.\n", @@ -4571,7 +4571,7 @@ "\n", "| Name | Type | Description | Default |\n", "| :------: | :----: | :------------------------------------------------------------------------------------------------------------------| :---:|\n", - "| labels | dict | Deprecated. Please use `mapper` keyword. | None |\n", + "| mapper | dict | A dictionary of either new index or column names to new names to be used in conjunction with the _axis_ parameter. | None |\n", "| columns | dict | A dictionary of column name to new column name to use when renaming. | None |\n", "| index | dict | A dictionary of index to new index name to use when renaming single key column keyed tables. | None |\n", "| axis | {0 or 'index', 1 or 'columns'} | Designating the axis to be renamed by the _mapper_ dictionary. | None |\n", @@ -4579,7 +4579,6 @@ "| inplace | bool | Not yet implemented. | None |\n", "| level | None | Not yet implemented. | None |\n", "| errors | string | Not yet implemented. | None |\n", - "| mapper | dict | A dictionary of either new index or column names to new names to be used in conjunction with the _axis_ parameter. | None |\n", "\n", "**Returns:**\n", "\n", diff --git a/docs/user-guide/advanced/compress-encrypt.md b/docs/user-guide/advanced/compress-encrypt.md index 0d851d9..0d5a153 100644 --- a/docs/user-guide/advanced/compress-encrypt.md +++ b/docs/user-guide/advanced/compress-encrypt.md @@ -80,6 +80,50 @@ Once you are familiar with the options available to you, it's time to initialize We use this object in the remaining sections of the walkthrough, in a local (one-shot) and global context. +### Persist a splayed table with various configurations + +In cases where your are dealing with data security is important or where data is accessed at low frequency where latency considerations are not important persisting a table with encryption/compression enabled can be advantageous. + +1. Create a new `trade` table with gzip compression enabled. + + ```python + >>> import pykx as kx + >>> N = 10000000 + >>> trade = kx.Table(data={ + ... 'date': kx.random.random(N, kx.DateAtom('today') - [1, 2, 3, 4]), + ... 'time': kx.q.asc(kx.random.random(N, kx.q('1D'))), + ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), + ... 'price': kx.random.random(N, 10.0) + ... }) + >>> gzip = kx.Compress(algo=kx.CompressionAlgorithm.gzip, level=4) + >>> db = kx.DB(path='/tmp/splay') + >>> db.create(trade, 'trade', format='splayed', compress=gzip) + >>> kx.q('-21!`:/tmp/splay/trade/price') + pykx.Dictionary(pykx.q(' + compressedLength | 42727971 + uncompressedLength| 80000016 + algorithm | 2i + logicalBlockSize | 17i + zipLevel | 4i + ')) + ``` + +2. Create a new `quote` table with encryption enabled. + + ```python + >>> import pykx as kx + >>> N = 10000000 + >>> quote = kx.Table(data={ + ... 'time': kx.q.asc(kx.random.random(N, kx.q('1D'))), + ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), + ... 'ask': kx.random.random(N, 100), + ... 'bid': kx.random.random(N, 100) + ... }) + >>> encrypt = kx.Encrypt(path='/path/to/my.key', password='PassWorD') + >>> db = kx.DB(path='/tmp/splay') + >>> db.create(trade, 'trade', format='splayed', encrypt=encrypt) + ``` + ### Persist database partitions with various configurations Not all data is created equally, in time-series applications such as algorithmic trading it is often the case that older data is less valuable than newer data. As a result, when backfilling historical data, you may more aggressively compress older datasets. The PyKX compression logic allows you to persist different partitions within a historical database to different levels. diff --git a/docs/user-guide/advanced/database/db_gen.md b/docs/user-guide/advanced/database/db_gen.md index cf44417..5444801 100644 --- a/docs/user-guide/advanced/database/db_gen.md +++ b/docs/user-guide/advanced/database/db_gen.md @@ -13,7 +13,7 @@ _This page explains how to create and expand databases using PyKX._ !!! tip "Tip: For the best experience, we recommend reading [Databases in PyKX](index.md) first. If you already have access to a database and only need to load it, you can skip this page and jump right to [load database](db_loading.md)." -Before leveraging the performance of PyKX when querying on-disk data, you need to create a [persisted database](..//..//..//extras/glossary.md#persisted-database). In the following sections we complete the following: +Before leveraging the performance of PyKX when querying on-disk data, you need to create a [persisted database](../../../extras/glossary.md#persisted-database). In the following sections we complete the following: 1. [Create a new database](#1-create-database) containing a single table `#!python trade` and multiple days of data. 1. [Add a new day worth of data](#2-add-new-database-partition) for `#!python today` to the database for the `#!python trade` table. @@ -26,7 +26,7 @@ Before leveraging the performance of PyKX when querying on-disk data, you need t ## 1. Create database -For more information on database structures, see the linked section on [what is a database](index.md#whats-a-pykx-database). With PyKX, use the `#!python pykx.DB` class for all database interactions in Python. This class lets you create, expand, and maintain on-disk partitioned databases. First, we need to create a database. +For more information on database structures, see the linked section on [what is a database](index.md#whats-a-pykx-database). With PyKX, use the `#!python pykx.DB` class for all database interactions in Python. This class lets you create, expand, and maintain on-disk splayed/partitioned databases. First, we need to create a database. In the next cell, we create a `#!python trade` table with data from multiple days in the chat. @@ -43,25 +43,47 @@ In the next cell, we create a `#!python trade` table with data from multiple day Now that we have generated our trade table, we can persist it to disk at the location `#!python /tmp/db`. -```python ->>> db = kx.DB(path='/tmp/db') ->>> db.create(trade, 'trade', 'date') -``` +=== "Partitioned Database" + + ```python + >>> db = kx.DB(path='/tmp/db') + >>> db.create(trade, 'trade', 'date') + ``` + +=== "Splayed Database" + + ```python + >>> db = kx.DB(path='/tmp/splay') + >>> db.create(trade, 'trade', format='splayed') + ``` That's it, you now have a persisted database. To verify the availability of the database and its tables, we can examine the database object: -```python ->>> db.tables -['trade'] ->>> type(db.trade) - -``` +=== "Partitioned Database" + + ```python + >>> db.tables + ['trade'] + >>> type(db.trade) + + ``` + +=== "Splayed Database" + + ```python + >>> db.tables + ['trade'] + >>> type(db.trade) + + ``` -The above database persistence uses the default parameters within the `#!python create` function. If you need to compress/encrypt the persisted database partitions or need to define a `#!python by` or specify the symbol enumeration name, you can follow the API documentation [here](../../../api/db.md#pykx.db.DB.create). +The above database persistence uses the default parameters within the `#!python create` function. If you need to compress/encrypt the tables persisted to the database or need to define a `#!python by` or specify the symbol enumeration name when persisting a Partitioned Database, you can follow the API documentation [here](../../../api/db.md#pykx.db.DB.create). ## 2. Add new database partition -Now that you have generated a database, you can add extra partitions using the same database class and the `#!python create` function. In this example we will add new data for the current day created in the below cell: +The following section outlines functionality only applicable to Partitioned Databases. + +Now that you have generated a database, you can add extra partitions using the same database class and the `#!python create` function. In this example we will add new data for the current day created in the below cell and convert it to a Pandas DataFrame prior to persistence: ```python >>> N = 2000000 @@ -69,7 +91,9 @@ Now that you have generated a database, you can add extra partitions using the s ... 'time': kx.q.asc(kx.random.random(N, kx.q('1D'))), ... 'sym': kx.random.random(N, ['AAPL', 'GOOG', 'MSFT']), ... 'price': kx.random.random(N, 10.0) -... }) +... }).pd() +>>> type(trade) +pandas.core.frame.DataFrame ``` Note that in comparison to the original database creation logic, we do not have a `#!python date` column. Instead, we add a date at partition creation. Below we provide a variety of examples of adding new partitions under various conditions: @@ -100,7 +124,7 @@ Note that in comparison to the original database creation logic, we do not have ## 3. Add new table to database -After onboarding your first table to a database, a common question is “How can I add a new table of related data?”. You can use the `#!python database` class and the `#!python create` function to do this. For instance, let’s add a `#!python quote` table for the current day: +After onboarding your first table to a database, a common question is “How can I add a new table of related data?”. You can use the `#!python DB` class and the `#!python create` function to do this. For instance, let’s add a `#!python quote` table for the current day: ```python >>> N = 1000000 @@ -112,18 +136,36 @@ After onboarding your first table to a database, a common question is “How can ... }) ``` -We can now add this as the data for the current day to the `#!python quote` table and see that the table is defined: +We can now add this data to your database -```python ->>> db.create(quote, 'quote', kx.DateAtom('today')) ->>> db.tables -['quote', 'trade'] ->>> type(db.quote) - -``` +=== "Partitioned Database" + + For the current day we can add the `#!python quote` table and see that the table is defined: + + ```python + >>> db.create(quote, 'quote', kx.DateAtom('today')) + >>> db.tables + ['quote', 'trade'] + >>> type(db.quote) + + ``` + +=== "Splayed Database" + + Add the table `#!python quote` to the database + + ```python + >>> db.create(quote, 'quote', format='splayed') + >>> db.tables + ['trade', 'quote'] + >>> type(db.quote) + + ``` ## 4. Ensure new table is queryable +The following section outlines a restriction only applicable to Partitioned Databases, Splayed Databases should be queryable immediately. + You have now persisted another table to your database, however, you will notice if you access the `#!python quote` table that the return is surprising: ```python diff --git a/docs/user-guide/advanced/database/db_mgmt.md b/docs/user-guide/advanced/database/db_mgmt.md index fbf3446..3df2f14 100644 --- a/docs/user-guide/advanced/database/db_mgmt.md +++ b/docs/user-guide/advanced/database/db_mgmt.md @@ -60,10 +60,10 @@ In the below cell, we complete the following: >>> db.apply_function('trade', 'price_copy', lambda x: x * 0.5) >>> db.delete_column('trade', 'price') >>> db.rename_column('trade', 'price_copy', 'price') ->>> db.reorder_columns(col_order) +>>> db.reorder_columns('trade', col_order) ``` ## Next Steps - [Query your database with Python](../../fundamentals/query/pyquery.md) -- [Compress/encrypt data](../compress-encrypt.md#persisting-database-partitions-with-various-configurations) for persisting database partitions. +- [Compress/encrypt data](../compress-encrypt.md) when persisting databases. diff --git a/docs/user-guide/advanced/database/index.md b/docs/user-guide/advanced/database/index.md index 4bb6d38..e7d474d 100644 --- a/docs/user-guide/advanced/database/index.md +++ b/docs/user-guide/advanced/database/index.md @@ -12,7 +12,31 @@ _This page explains the concept of databases in PyKX, including the creation and ## What's a PyKX database? -In PyKX, the term database refers to [partitioned kdb+ databases](https://code.kx.com/q/kb/partition/). A partitioned kdb+ database consists of one or more tables saved on-disk, where they are split into separate folders called partitions. These partitions are most often based on a temporal field within the dataset, such as date or month. Each table within the database must follow the same partition structure. +In PyKX, the term database refers to a kdb+ database which can hold a set of [splayed](https://code.kx.com/q/kb/splayed-tables/) and [partitioned](https://code.kx.com/q/kb/partition/) tables. + +### Splayed Database + +A splayed kdb+ database consists of a single table stored on-disk with each column stored as a separate file rather than using a single file for the whole table. Tables of medium-size with < 100 million rows and many columns are good candidates for being stored as splayed tables, in particular when only a small subset of columns are being accessed often. + +```bash +quotes + ├── .d + ├── price + ├── sym + └── time +``` + +!!! note "More information on splayed databases" + + The splayed database format used by PyKX has been used in production environments for decades. As such there is a significant amount of information available on the creation and use of these databases. Below are some articles. + + - [q knowledge base splayed databases](https://code.kx.com/q/kb/splayed-tables/) + - [Q for Mortals splayed tables](https://code.kx.com/q4m3/14_Introduction_to_Kdb%2B/#142-splayed-tables) + - [Basics of splayed tables](https://thinqkdb.wordpress.com/splayed-tables/) + +### Partitioned Database + +A partitioned kdb+ database consists of one or more tables saved on-disk, where they are split into separate folders called partitions. These partitions are most often based on a temporal field within the dataset, such as date or month. Each table within the database must follow the same partition structure. A visual representation of a database containing 2 tables (trade and quote) partitioned by date would be as follows, where `#!python price`, `#!python sym`, `#!python time` in the quotes folder are columns within the table: diff --git a/docs/user-guide/fundamentals/conversion_considerations.md b/docs/user-guide/fundamentals/conversion_considerations.md index 5870c17..20213ee 100644 --- a/docs/user-guide/fundamentals/conversion_considerations.md +++ b/docs/user-guide/fundamentals/conversion_considerations.md @@ -32,4 +32,65 @@ Most q datatypes have the concepts of null, negative infinity, and infinity. Pyt ## Temporal data types -Converting [temporal data types](./temporal.md) in PyKX involves handling [timestamp/datetime](./temporal.md#timestampdatetime-types) types and [duration](./temporal.md#duration-types) types, each with specific considerations due to differences in how Python and q (the language used by kdb+) represent these data types. \ No newline at end of file +Converting [temporal data types](./temporal.md) in PyKX involves handling [timestamp/datetime](./temporal.md#timestampdatetime-types) types and [duration](./temporal.md#duration-types) types, each with specific considerations due to differences in how Python and q (the language used by kdb+) represent these data types. + +## List conversion considerations + +By default the library converts generic PyKX List objects `#!python pykx.List` to NumPy as an array of NumPy arrays. This conversion is chosen as it allows for the most flexible representation of data allowing ragged array representations and mixed lists of objects to be converted easily. However, this representation can be difficult to work with if/when dealing with multi-dimensional numeric data as is common in machine learning tasks for example. + +As an example we can look at the conversion of a 3-Dimensional regularly shaped `#!python pykx.List` object to a NumPy array as follows: + +```python +>>> import pykx as kx +>>> qlist = kx.random.random([2, 2, 2], 5.0) +pykx.List(pykx.q(' +3.453383 3.388243 0.8355005 4.325851 +0.6168138 3.450051 3.849182 2.360245 +')) +>>> qlist.np() +array([array([array([3.45338272, 3.3882429 ]), array([0.83550046, 4.32585143])], + dtype=object) , + array([array([0.61681376, 3.45005117]), array([3.84918233, 2.36024517])], + dtype=object) ], + dtype=object) +``` + +This representation clearly is more difficult to handle than you might expect for a regularly shaped numeric dataset of single type. A keyword argument `#!python reshape` is provided to facilitate a better converted representation of these singularly typed N-Dimensional lists, for example: + +```python +>>> import pykx as kx +>>> qlist = kx.random.random([2, 2, 2], 5.0) +>>> qlist.np(reshape=True) +array([[[3.45338272, 3.3882429 ], + [0.83550046, 4.32585143]], + [[0.61681376, 3.45005117], + [3.84918233, 2.36024517]]]) +``` + +Setting the `#!python reshape` keyword to `#!python True` checks if the input list is "rectangular" and contains only one data type before converting it to a single NumPy array by ['razing'](https://code.kx.com/q/ref/raze/) the data to a single array and reshaping the data in NumPy post conversion. + +This can be slow for nested arrays or many list elements. If you know the input and output shape of the data, you can pass this shape to the `#!python reshape` keyword like this: + +```python +>>> import pykx as kx +>>> qlist = kx.random.random([10000, 100, 10], 10.0) +>>> qlist.np(reshape=[10000, 100, 10]) +array([[[4.99088645, 9.20164969, 3.3486574 , ..., 9.28529354, + 7.78650336, 0.9355585 ], + [9.49664481, 0.79703755, 8.41364461, ..., 5.28080439, + 7.3933825 , 7.40476901], + [6.03204263, 9.40702084, 6.75116092, ..., 2.43375089, + 9.33645056, 8.56930709], + ... +``` + +The performance boost from knowing the shape ahead of time is significant + +```python +import pykx as kx +qlist = kx.random.random([10000, 100, 10], 10.0) +%timeit qlist.np(reshape=True) +# 974 ms ± 34.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) +%timeit qlist.np(reshape=[10000, 100, 10]) +# 81.2 ms ± 2.69 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) +``` diff --git a/docs/user-guide/fundamentals/creating.md b/docs/user-guide/fundamentals/creating.md index bd16a94..1339fcb 100644 --- a/docs/user-guide/fundamentals/creating.md +++ b/docs/user-guide/fundamentals/creating.md @@ -26,7 +26,7 @@ There are five ways to create PyKX objects: ### 1.a Convert Python objects to PyKX objects -The simplest way to create a PyKX object is by converting a similar Python type into a PyKX object. You can do this with the `#!python pykx.toq function`, which supports conversions from Python, NumPy, pandas, and PyArrow types to PyKX objects. Open the tabs that interest you to see conversion examples: +The simplest way to create a PyKX object is by converting a similar Python type into a PyKX object. You can do this with the `#!python pykx.toq function`, which supports conversions from Python, NumPy, Pandas, PyArrow, and PyTorch (Beta) types to PyKX objects. Open the tabs that interest you to see conversion examples: ??? Note "Specify target types" @@ -170,6 +170,29 @@ The simplest way to create a PyKX object is by converting a similar Python type ')) ``` +=== "PyTorch (Beta)" + + When converting data from PyTorch types to PyKX support is only provided for `#!python torch.Tensor` object conversions to PyKX at this time and requires setting of the configuration `PYKX_BETA_FEATURES=True` as shown below + + ```python + >>> import os + >>> os.environ['PYKX_BETA_FEATURES'] = 'True' + >>> import pykx as kx + >>> import torch + >>> pt = torch.Tensor([1, 2, 3]) + tensor([1., 2., 3.]) + >>> ptl = torch.Tensor([[1, 2, 3], [4, 5, 6]]) + tensor([[1., 2., 3.], + [4., 5., 6.]]) + >>> kx.toq(pt) + pykx.RealVector(pykx.q('1 2 3e')) + >>> kx.toq(ptl) + pykx.List(pykx.q(' + 1 2 3 + 4 5 6 + ')) + ``` + By default, when you convert Python strings to PyKX, they are returned as `#!python pykx.SymbolAtom` objects. This ensures a clear distinction between `#!python str` (string) and `#!python byte` objects. However, you might prefer Python strings to be returned as `#!python pykx.CharVector` objects, to achieve memory efficiency or greater flexibility in analytic development. To do this, use the keyword argument `#!python strings_as_char`, which ensures that all `#!python str` objects are converted to `#!python pykx.CharVector` objects. ```python @@ -319,20 +342,23 @@ a 0.02810674 0.481821 Converting data to a PyKX format allows for easy interaction with these objects using q or the analytic functionality provided by PyKX. However, this format may not be suitable for all use cases. For instance, if a function requires a Pandas DataFrame as input, a PyKX object must be converted to a Pandas DataFrame. -Once the data is ready for use in Python, it may be more appropriate to convert it into a representation using Python, NumPy, Pandas, or PyArrow by using the following methods: +Once the data is ready for use in Python, it may be more appropriate to convert it into a representation using Python, NumPy, Pandas, PyArrow, or PyTorch (Beta) by using the following methods: -| **Method** | **Description** | -|----------|----------------------------------| -| `*.py()` | Convert a PyKX object to Python | -| `*.np()` | Convert a PyKX object to Numpy | -| `*.pd()` | Convert a PyKX object to Pandas | -| `*.pa()` | Convert a PyKX object to PyArrow | +| **Method** | **Description** | +|-----------------|----------------------------------| +| `*.py()` | Convert a PyKX object to Python | +| `*.np()` | Convert a PyKX object to Numpy | +| `*.pd()` | Convert a PyKX object to Pandas | +| `*.pa()` | Convert a PyKX object to PyArrow | +| `*.pt()` (Beta) | Convert a PyKX object to PyTorch | ??? example "Example" ```python - import pykx as kx - qarr = kx.q('til 5') + >>> import os + >>> os.environ['PYKX_BETA_FEATURES'] = 'True' + >>> import pykx as kx + >>> qarr = kx.q('til 5') >>> qarr.py() [0, 1, 2, 3, 4] >>> qarr.np() @@ -353,6 +379,8 @@ Once the data is ready for use in Python, it may be more appropriate to convert 3, 4 ] + >>> qarr.pt() + tensor([0, 1, 2, 3, 4]) >>> >>> qtab = kx.Table(data={ ... 'x': kx.random.random(5, 1.0), diff --git a/docs/user-guide/fundamentals/query/pyquery.md b/docs/user-guide/fundamentals/query/pyquery.md index e129603..fa91a62 100644 --- a/docs/user-guide/fundamentals/query/pyquery.md +++ b/docs/user-guide/fundamentals/query/pyquery.md @@ -199,6 +199,17 @@ The `columns` keyword provides the ability to access columnar data by name or ap AAPL 2022.01.02 140.0383 54 280.0766 .. ')) + >>> trades.update(columns=kx.Column('price', name='dpx') * 2) + pykx.Table(pykx.q(' + sym date price size dpx + -------------------------------------- + AAPL 2022.01.01 145.6259 19 291.2518 + MSFT 2022.01.02 533.9187 92 1067.837 + MSFT 2022.01.02 17.17696 7 34.35393 + GOOG 2022.01.03 916.1286 60 1832.257 + AAPL 2022.01.02 140.0383 54 280.0766 + .. + ')) ``` - Multiple columns can be modified, retrieved or aggregations applied by using queries can be returned and have aggregations/operation performed on them. @@ -263,7 +274,7 @@ The `columns` keyword provides the ability to access columnar data by name or ap ')) ``` -- Columns can be named by using the `name` method on you column objects +- Columns can be named/renamed by using the `name` method or keyword. === "select" @@ -274,6 +285,12 @@ The `columns` keyword provides the ability to access columnar data by name or ap -------- 989.3873 ')) + >>> trades.select(columns=kx.Column('price', name='maxPrice').max()) + pykx.Table(pykx.q(' + maxPrice + -------- + 989.3873 + ')) ``` === "exec" @@ -285,6 +302,12 @@ The `columns` keyword provides the ability to access columnar data by name or ap multiPrice| 291.2518 1067.837 34.35.. symName | AAPL MSFT MSFT .. ')) + >>> trades.exec(columns=(2 * kx.Column('price', name='multiPrice') & + ... kx.Column('sym', name='symName')) + pykx.Dictionary(pykx.q(' + multiPrice| 291.2518 1067.837 34.35.. + symName | AAPL MSFT MSFT .. + ')) ``` === "update" @@ -303,18 +326,44 @@ The `columns` keyword provides the ability to access columnar data by name or ap AAPL 2022.01.02 140.0383 54 140.0383 .. ')) + >>> trades.update(columns=kx.Column('price', name='priceCol')) + pykx.Table(pykx.q(' + sym date price size priceCol + -------------------------------------- + AAPL 2022.01.01 145.6259 19 145.6259 + MSFT 2022.01.02 533.9187 92 533.9187 + MSFT 2022.01.02 17.17696 7 17.17696 + GOOG 2022.01.03 916.1286 60 916.1286 + AAPL 2022.01.02 140.0383 54 140.0383 + .. + ')) ``` -Finally as an alternative approach for renaming a dictionary can be used to control names of returned columns. +- As an alternative approach for renaming, a dictionary can be used to control names of returned columns. -```python ->>> trades.select(columns={'maxPrice':kx.Column('price').max()}) -pykx.Table(pykx.q(' -maxPrice --------- -993.6284 -')) -``` + ```python + >>> trades.select(columns={'maxPrice':kx.Column('price').max()}) + pykx.Table(pykx.q(' + maxPrice + -------- + 993.6284 + ')) + ``` + +- You can also use the `value` keyword to pass data when generating a new column. For example + + ```python + >>> trades.update(kx.Column('newCol', value = kx.random.random(100, 10.0))).head(5) + pykx.Table(pykx.q(' + sym date price size newCol + --------------------------------------- + AAPL 2022.01.01 145.6259 19 3.489322 + MSFT 2022.01.02 533.9187 92 4.731594 + MSFT 2022.01.02 17.17696 7 8.769994 + GOOG 2022.01.03 916.1286 60 0.3928818 + AAPL 2022.01.02 140.0383 54 4.937273 + ')) + ``` #### where diff --git a/docs/user-guide/fundamentals/query/qquery.md b/docs/user-guide/fundamentals/query/qquery.md index 1860405..ca8cfe0 100644 --- a/docs/user-guide/fundamentals/query/qquery.md +++ b/docs/user-guide/fundamentals/query/qquery.md @@ -77,7 +77,7 @@ GOOG 2022.01.02 423.6121 ## Next Steps -Now that you have learnt how to query your data using the Pythonic API you may be interested in other methods for querying your data: +Now that you have learnt how to query your data using q you may be interested in other methods for querying your data: - If you want to query your data in a more Python-first way follow the guide [here](./pyquery.md). - If you wish to query your data using SQL, you can follow the introduction to this functionality [here](./sql.md). diff --git a/docs/user-guide/fundamentals/query/sql.md b/docs/user-guide/fundamentals/query/sql.md index 9557816..020da72 100644 --- a/docs/user-guide/fundamentals/query/sql.md +++ b/docs/user-guide/fundamentals/query/sql.md @@ -119,10 +119,11 @@ GOOG 2022.01.02 423.6121 ## Next Steps -Now that you have learnt how to query your data using the Pythonic API you may be interested in other methods for querying your data: +Now that you have learnt the fundamentals of how to query your data using the SQL API you may be interested in: -- To optimize frequently called SQL queries the [prepare](../../../api/query.md#pykx.query.SQL.prepare) and [execute](../../../api/query.md#pykx.query.SQL.execute) can be used to separate SQL parsing from query execution as detailed [here](https://code.kx.com/insights/1.10/core/sql.html#prepare-and-execute). +- To optimize frequently called SQL queries the [prepare](../../../api/query.md#pykx.query.SQL.prepare) and [execute](../../../api/query.md#pykx.query.SQL.execute) methods can be used to separate SQL parsing from query execution as detailed [here](https://code.kx.com/insights/1.10/core/sql.html#prepare-and-execute). - If you want to query your data in a more Python-first way follow the guide [here](./pyquery.md). +- If you want to query your data in q follow the guide [here](./qquery.md). - To learn how to make your queries more performant following the tips and tricks [here](./perf.md). For some further reading, here are some related topics: diff --git a/mkdocs.yml b/mkdocs.yml index 27633bd..e2e95bd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,7 +3,7 @@ site_name: 'PyKX' site_author: 'KX' site_description: 'PyKX Documentation' site_url: 'https://code.kx.com/pykx' -copyright: '© 2024 Kx Systems, Inc. KX and kdb+ are registered trademarks of Kx Systems, Inc., a subsidiary of FD Technologies plc.' +copyright: '© 2025 Kx Systems, Inc. KX and kdb+ are registered trademarks of Kx Systems, Inc., a subsidiary of FD Technologies plc.' site_dir: 'public' dev_addr: 'localhost:8080' @@ -284,7 +284,9 @@ nav: - PyKX under q: release-notes/underq-changelog.md - 2.x -> 3.x Upgrade : upgrades/2030.md - Roadmap: roadmap.md - - Beta features: beta-features/index.md + - Beta features: + - Introduction: beta-features/index.md + - PyTorch Conversions: beta-features/torch.md - Help and Support: - Troubleshooting: help/troubleshooting.md - FAQ: help/faq.md diff --git a/pyproject.toml b/pyproject.toml index 1a5cb6d..7c6e957 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,12 +48,13 @@ classifiers = [ "Typing :: Typed", ] dependencies = [ - "numpy~=1.20, <2.0; python_version=='3.7'", - "numpy~=1.22, <2.0; python_version=='3.8'", - "numpy~=1.22, <2.0; python_version=='3.9'", - "numpy~=1.22, <2.0; python_version=='3.10'", - "numpy~=1.23, <2.0; python_version=='3.11'", - "numpy~=1.26, <2.0; python_version=='3.12'", + "numpy~=1.20; python_version=='3.7'", + "numpy~=1.22; python_version=='3.8'", + "numpy~=1.22; python_version=='3.9'", + "numpy>=1.22; python_version=='3.10'", + "numpy>=1.23; python_version=='3.11'", + "numpy>=1.26; python_version=='3.12'", + "numpy>=1.26; python_version=='3.13'", "pandas>=1.2, < 2.0; python_version=='3.8'", "pandas>=1.2, <= 2.2.3; python_version>'3.8'", "pytz>=2022.1", @@ -99,7 +100,7 @@ lint = [ "pyproject-flake8==0.0.1a2", ] pyarrow = [ - "pyarrow>=3.0.0", + "pyarrow>=3.0.0, <19.0.0", ] streaming = [ "psutil>=5.0.0" @@ -110,6 +111,9 @@ dashboards = [ streamlit = [ "streamlit~=1.28; python_version>'3.7'" ] +torch = [ + "torch>2.1" +] test = [ "coverage[toml]==6.3.2", "Cython~=3.0.0", @@ -143,11 +147,12 @@ changelog = "https://code.kx.com/pykx/changelog.html" requires = [ "Cython~=3.0.0", "numpy~=1.20.0; python_version=='3.7'", # Use numpy version 1.20.x for building the python 3.7 wheel - "numpy~=1.22, <1.23; python_version=='3.8'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 - "numpy~=1.22, <1.23; python_version=='3.9'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 - "numpy~=1.22, <1.23; python_version=='3.10'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 - "numpy~=1.23.2, <1.24; python_version=='3.11'", - "numpy~=1.26.0; python_version=='3.12'", + "numpy~=1.22.0; python_version=='3.8'", # Use the highest patch version of numpy 1.22.x, this will still support a user using numpy version 1.22.0 + "numpy~=2.0; python_version=='3.9'", + "numpy~=2.0; python_version=='3.10'", + "numpy~=2.0; python_version=='3.11'", + "numpy~=2.0; python_version=='3.12'", + "numpy~=2.0; python_version=='3.13'", "setuptools~=68.0.0; python_version=='3.7'", "setuptools~=69.0.2; python_version!='3.7'", "setuptools-scm[toml]~=7.1.0; python_version=='3.7'", diff --git a/setup.py b/setup.py index c84e084..242068c 100755 --- a/setup.py +++ b/setup.py @@ -262,13 +262,43 @@ def ext(name: str, numpy=False, cython=False, libraries=['dl', *windows_libraries])) - with cd(src_dir/'lib'): + + with cd(src_dir/'q.so'/'qk'): [ - shutil.copy(f, f'4-1-libs/{f}') + shutil.copy(f, f'../../lib/4-1-libs/{f}') for f in - [str(f) for f in os.listdir() if os.path.isfile(f) and str(f) != 'q.k'] + [str(f) for f in os.listdir() if os.path.isfile(f) and (str(f) != 'q.k') and ('pykx_init' not in str(f))] # noqa: E501 ] + + [ + shutil.copy(f, f'../../lib/{f}') + for f in + [str(f) for f in os.listdir() if os.path.isfile(f) and (str(f) != 'q.k') and ('pykx_init' not in str(f))] # noqa: E501 + ] + + [ + shutil.copy('pykx_init.q_', '../../' + p + 'pykx_init.q_') + for p in + ['', 'lib/', 'lib/4-1-libs/'] + ] + for p in ('l64', 'l64arm', 'm64', 'm64arm', 'w64'): + with cd(src_dir/'q.so'/'libs'/'4-1'/p): + [ + shutil.copy(f, f'../../../../lib/4-1-libs/{p}/{f}') + for f in + [str(f) for f in os.listdir() + if str(f) != 'symbols.txt' and not os.path.exists(f'../../../../4-1-libs/{p}/{f}')] + ] + + with cd(src_dir/'q.so'/'libs'/'4-0'/p): + [ + shutil.copy(f, f'../../../../lib/{p}/{f}') + for f in + [str(f) for f in os.listdir() + if str(f) != 'symbols.txt' and not os.path.exists(f'../../../../../{p}/{f}')] + ] + with cd(src_dir/'lib'/p): [ shutil.copy(f, f'../4-1-libs/{p}/{f}') @@ -276,6 +306,18 @@ def ext(name: str, [str(f) for f in os.listdir() if str(f) != 'symbols.txt' and not os.path.exists(f'../4-1-libs/{p}/{f}')] ] + + with cd(src_dir/'lib'): + [ + shutil.copy(f, f'4-1-libs/{f}') + for f in + [str(f) for f in os.listdir() if os.path.isfile(f) and (str(f) != 'q.k')] + ] + + with cd(src_dir): + shutil.copy('pykx.q', 'lib') + shutil.copy('pykx.q', 'lib/4-1-libs') + setup( name=pyproject['name'], description=pyproject['description'], diff --git a/src/pykx/_wrappers.pyx b/src/pykx/_wrappers.pyx index a16853b..95bc93f 100644 --- a/src/pykx/_wrappers.pyx +++ b/src/pykx/_wrappers.pyx @@ -107,9 +107,8 @@ def k_str(self): return repr(self) cdef core.K x = core.k(0, '.Q.s', core.r1(_k(self)), NULL) if x.n == 0: - s = '' - else: - s = np.asarray(x.G0).tobytes().decode() + x = core.k(0, '.Q.s1', core.r1(_k(self)), NULL) + s = np.asarray(x.G0).tobytes().decode() core.r0(x) if len(s) and s.endswith(os.linesep): # Use `len(s) - X` instead of `-X` because wraparound is disabled here @@ -536,6 +535,8 @@ def _pyfactory(addr: int, incref: bool, typenum: int, raw: bool = False): return k_object.pa(raw=raw) elif typenum == 5: return k_object + elif typenum == 6: + return k_object.pt() k_dir = dir(k_object) if 'np' in k_dir: # nocov return k_object.np(raw=raw) # nocov @@ -545,4 +546,6 @@ def _pyfactory(addr: int, incref: bool, typenum: int, raw: bool = False): return k_object.pd(raw=raw) # nocov elif 'pa' in k_dir: # nocov return k_object.pa(raw=raw) # nocov + elif 'pt' in k_dir: #nocov + return k_object.pt(raw=raw) # nocov diff --git a/src/pykx/config.py b/src/pykx/config.py index ebdd6cf..1cdd6e7 100644 --- a/src/pykx/config.py +++ b/src/pykx/config.py @@ -108,6 +108,10 @@ def _get_qhome(): # License search _qlic = _get_config_value('QLIC', '') +if _qlic != '': + if not os.path.isdir(_qlic): + warn(f'Configuration value QLIC set to non directory value: {_qlic}') + _pwd = os.getcwd() license_located = False lic_path = '' @@ -153,6 +157,18 @@ def _check_qargs(): qargs = _check_qargs() +def _license_install_path(root, lic_type, qlic): + license = input('\nProvide the download location of your license ' + f'(for example, {root}{lic_type}) : ').strip() + download_location = os.path.expanduser(Path(license)) + + if not os.path.exists(download_location): + raise Exception(f'Download location provided {download_location} does not exist.') + + shutil.copy(download_location, qlic) + print(f'\nPyKX license successfully installed to: {qlic / lic_type}\n') + + def _license_install_B64(license, license_type): # pragma: no cover try: lic = base64.b64decode(license) @@ -253,6 +269,8 @@ def _license_install(intro=None, return_value=False, license_check=False, licens elif continue_license in ('y', 'Y', ''): existing_license = input('\nDo you have access to an existing license for PyKX ' 'that you would like to use? [N/y]: ') + if existing_license not in ('Y', 'y', 'N', 'n', ''): + raise Exception('Invalid input provided please try again') if existing_license in ('N', 'n', ''): commercial = input('\nIs the intended use of this software for:' '\n [1] Personal use (Default)' @@ -296,16 +314,9 @@ def _license_install(intro=None, return_value=False, license_check=False, licens raise Exception('User provided option was not one of [1/2/3]') if install_type in ('1', ''): - license = input('\nProvide the download location of your license ' - f'(for example, {root}{lic_type}) : ').strip() - download_location = os.path.expanduser(Path(license)) - if not os.path.exists(download_location): - err_msg = f'Download location provided {download_location} does not exist.' - raise Exception(err_msg) + _license_install_path(root, lic_type, qlic) - shutil.copy(download_location, qlic) - print(f'\nPyKX license successfully installed to: {qlic / lic_type}\n') elif install_type == '2': license = input('\nProvide your activation key (base64 encoded string) ' @@ -319,15 +330,35 @@ def _license_install(intro=None, return_value=False, license_check=False, licens if return_value: return False else: - license = input('\nProvide the location of your license ' - f'(for example, {root}) : ').strip() - download_location = os.path.expanduser(Path(license)) + commercial = input('\nPlease confirm the license type:\n' + ' [1] Personal use (kc.lic)\n' + ' [2] Commercial use (k4.lic)\n' + 'Enter your choice here [1/2]: ') + if commercial not in ('1', '2', ''): + raise Exception('User provided option was not one of [1/2]') + + personal = commercial in ('1', '') + lic_type = 'kc.lic' if personal else 'k4.lic' - if not os.path.exists(download_location): - raise Exception(f'Download location provided {download_location} does not exist.') + install_type = input( + '\nPlease select the method you wish to use to activate your license:\n' + ' [1] Provide the location of your license\n' + ' [2] Input the activation key\n' + 'Enter your choice here [1/2]: ') + + if install_type not in ('1', '2', ''): + raise Exception('User provided option was not one of [1/2]') + if install_type in ('1', ''): + + _license_install_path(root, lic_type, qlic) + + else: + + license = input('\nProvide your activation key (base64 encoded string) : ').strip() + + _license_install_B64(license, lic_type) - shutil.copy(download_location, qlic) - print('\nPyKX license successfully installed to: {qlic / lic_type}\n') # noqa: E501 + print(f'\nPyKX license successfully installed to: {qlic / lic_type}\n') # noqa: E501 else: raise Exception('Invalid input provided please try again') if return_value: diff --git a/src/pykx/core.pyx b/src/pykx/core.pyx index 4bb1299..5a8130e 100644 --- a/src/pykx/core.pyx +++ b/src/pykx/core.pyx @@ -7,6 +7,7 @@ from typing import List, Tuple import re import sys + from . import beta_features from .util import add_to_config, num_available_cores from .config import tcore_path_location, _is_enabled, _license_install, pykx_threading, _get_config_value, pykx_lib_dir, ignore_qhome, lic_path @@ -92,7 +93,7 @@ import subprocess import sys from .config import find_core_lib, k_gc, qargs, qhome, qlic, pykx_lib_dir, \ - release_gil, _set_licensed, under_q, use_q_lock + release_gil, _set_licensed, under_q, use_q_lock, _qlic from .exceptions import PyKXException, PyKXWarning final_qhome = str(qhome if ignore_qhome else pykx_lib_dir) @@ -295,10 +296,13 @@ if not pykx_threading: if _qinit_unsuccessful: # Fallback to unlicensed mode if _qinit_output != ' ': _capout_msg = f'Captured output from initialization attempt:\n{_qinit_output}' + _paths_checked = f' QLIC ({_qlic if _qlic else "Not Set"})\n'\ + f' QHOME ({qhome})' _lic_location = f'License location used:\n{lic_path}' else: _capout_msg = '' # nocov - this can only occur under extremely weird circumstances. _lic_location = '' # nocov - this additional line is to ensure this code path is covered. + _paths_checked = '' # nocov - this additional line is to ensure this code path is covered. if hasattr(sys, 'ps1'): if re.compile('exp').search(_capout_msg): _exp_license = 'Your PyKX license has now expired.\n\n'\ @@ -309,7 +313,7 @@ if not pykx_threading: elif re.compile('embedq').search(_capout_msg): _ce_license = 'You appear to be using a non kdb Insights license.\n\n'\ f'{_capout_msg}\n\n'\ - f'{_lic_location}\n\n'\ + f'{_lic_location}\n\n'\ 'Running PyKX in the absence of a kdb Insights license '\ 'has reduced functionality.\nWould you like to install '\ 'a kdb Insights personal license? [Y/n]: ' @@ -326,8 +330,14 @@ if not pykx_threading: if '--licensed' in qargs or _is_enabled('PYKX_LICENSED', '--licensed'): raise PyKXException(f'Failed to initialize embedded q.{_capout_msg}\n\n{_lic_location}') else: - warn('Failed to initialize PyKX successfully with ' - f'the following error: {_capout_msg}\n\n{_lic_location}', PyKXWarning) + warn('Failed to initialize PyKX successfully with ' f'the following error: {_capout_msg}\n', PyKXWarning) + if _paths_checked: + _missing_license = f'PyKX was unable to locate your license file in:\n{_paths_checked}\n\n'\ + 'Running PyKX in unlicensed mode has reduced functionality.\n\n'\ + 'Would you like to install a license? (Selecting no will proceed with unlicensed mode) [Y/n]: ' + _license_install(_missing_license, True) + else: + _paths_checked _libq_path_py = bytes(find_core_lib('e')) _libq_path = _libq_path_py _q_handle = dlopen(_libq_path, RTLD_NOW | RTLD_GLOBAL) @@ -362,7 +372,6 @@ if not pykx_threading: raise PyKXException( # nocov f'Non-zero qinit return code {qinit_return_code} despite successful pre-check') # nocov else: - beta_features.append('PyKX Threading') _libq_path_py = bytes(str(find_core_lib('q')), 'utf-8') _tcore_path = tcore_path_location _libq_path = _libq_path_py diff --git a/src/pykx/db.py b/src/pykx/db.py index 40d6322..9523711 100644 --- a/src/pykx/db.py +++ b/src/pykx/db.py @@ -34,6 +34,16 @@ def _check_loading(cls, table, err_msg): raise QError(err_msg + " not possible as specified table not available") +def _get_type(cls, table): + type_str = str(type(getattr(cls, table))) + if 'SplayedTable' in type_str: + return 'splayed' + elif 'PartitionedTable' in type_str: + return 'partitioned' + else: + raise QError(f'Unsupported type {type_str} passed to _get_type') + + def _check_column(cls, table, column): table_cols = cls.list_columns(table) if column not in table_cols: @@ -64,7 +74,7 @@ def _check_column(cls, table, column): _func_mapping = { 'dpt': '{[d;p;f;t;s] .Q.dpt[d;p;t]}', 'dpft': '{[d;p;f;t;s] .Q.dpft[d;p;f;t]}', - 'dpfs': '{[d;p;f;t;s] .Q.dpfs[d;p;f;s]}', + 'dpts': '{[d;p;f;t;s] .Q.dpts[d;p;t;s]}', 'dpfts': '{[d;p;f;t;s] .Q.dpfts[d;p;f;t;s]}' } @@ -159,8 +169,9 @@ def __init__(self, def create(self, table: k.Table, table_name: str, - partition: Union[int, str, k.DateAtom], + partition: Union[int, str, k.DateAtom] = None, *, # noqa: C901 + format: Optional[str] = 'partitioned', by_field: Optional[str] = None, sym_enum: Optional[str] = None, log: Optional[bool] = True, @@ -181,6 +192,7 @@ def create(self, partition: The name of the column which is to be used to partition the data if supplied as a `#!python str` or if supplied as non string object this is used as the partition to which all data is persisted. + format: Is the table that's being created a 'splayed' or 'partitioned' table by_field: A field of the table to be used as a by column, this column will be the second column in the table (the first being the virtual column determined by the partitioning column) @@ -279,8 +291,15 @@ def create(self, """ save_dir = self.path func_name = 'dpfts' + table = k.toq(table) if type(table) != k.Table: - raise QError('Supplied table must be of type pykx.Table') + raise QError('Supplied table must be of type "pykx.Table" or can be converted to this type') # noqa: E501 + if format not in ['splayed', 'partitioned']: + raise QError("'format' must be one of 'splayed'/'partitioned', supplied value: " + f'{format}') + if (format == 'partitioned') & (partition == None): # noqa: E711 + raise QError("Creation of partitioned format table requires a supplied " + "'partition' parameter, currently set as default None") if by_field is None: func_name = func_name.replace('f', '') if sym_enum is None: @@ -299,24 +318,28 @@ def create(self, compress.global_init(encrypt=encrypt) qfunc = q(_func_mapping[func_name]) try: - if type(partition) == str: - if partition not in table.columns: - raise QError(f'Partition column {partition} not in supplied table') - if type(table[partition]).t not in [5, 6, 7, 13, 14]: - raise QError(f'Unsupported type: {type(table[partition])} ' - 'not supported for table partitioning') - parts = q.distinct(table[partition]) - for i in parts: - if log: - print(f'Writing Database Partition {i} to table {table_name}') - q[table_name] = q('{?[x;enlist y;0b;()]}', table, [q('='), partition, i]) - q[table_name] = q('{![x;();0b;enlist y]}', q[table_name], partition) - qfunc(save_dir, i, by_field, table_name, sym_enum) + if format == 'splayed': + table = q.Q.en(save_dir, table) + q('{.Q.dd[x;`] set y}', save_dir/table_name, table) else: - q[table_name] = table - if log: - print(f'Writing Database Partition {partition} to table {table_name}') - qfunc(save_dir, partition, by_field, table_name, sym_enum) + if type(partition) == str: + if partition not in table.columns: + raise QError(f'Partition column {partition} not in supplied table') + if type(table[partition]).t not in [5, 6, 7, 13, 14]: + raise QError(f'Unsupported type: {type(table[partition])} ' + 'not supported for table partitioning') + parts = q.distinct(table[partition]) + for i in parts: + if log: + print(f'Writing Database Partition {i} to table {table_name}') + q[table_name] = q('{?[x;enlist y;0b;()]}', table, [q('='), partition, i]) + q[table_name] = q('{![x;();0b;enlist y]}', q[table_name], partition) + qfunc(save_dir, i, by_field, table_name, sym_enum) + else: + q[table_name] = table + if log: + print(f'Writing Database Partition {partition} to table {table_name}') + qfunc(save_dir, partition, by_field, table_name, sym_enum) except QError as err: q('{![`.;();0b;enlist x]}', table_name) q.z.zd = compression_cache @@ -469,7 +492,7 @@ def load(self, ''', db_path, db_name) self.path = load_path self.loaded = True - self.tables = q.Q.pt.py() + self.tables = q('{x where {-1h=type .Q.qp get x}each x}', q.tables()).py() for i in self.tables: if i in self._dir_cache: warn(f'A database table "{i}" would overwrite one of the pykx.DB() methods, please access your table via the table attribute') # noqa: E501 @@ -521,7 +544,11 @@ def rename_column(self, """ _check_loading(self, table, 'Column rename') _check_column(self, table, original_name) - q.dbmaint.renamecol(self.path, table, original_name, new_name) + table_type = _get_type(self, table) + if table_type == 'splayed': + q.dbmaint.rename1col(self.path / table, original_name, new_name) + else: + q.dbmaint.renamecol(self.path, table, original_name, new_name) self._reload() return None @@ -556,7 +583,11 @@ def delete_column(self, table: str, column: str) -> None: """ _check_loading(self, table, 'Column deletion') _check_column(self, table, column) - q.dbmaint.deletecol(self.path, table, column) + table_type = _get_type(self, table) + if 'splayed' == table_type: + q.dbmaint.delete1col(self.path / table, column) + else: + q.dbmaint.deletecol(self.path, table, column) self._reload() return None @@ -589,7 +620,11 @@ def rename_table(self, original_name: str, new_name: str) -> None: ``` """ _check_loading(self, original_name, 'Table rename') - q.dbmaint.rentable(self.path, original_name, new_name) + table_type = _get_type(self, original_name) + if 'splayed' == table_type: + q.dbmaint.ren1table(self.path / original_name, self.path / new_name) + else: + q.dbmaint.rentable(self.path, original_name, new_name) # Remove the original table, without this it persists as an accessible table q('{![`.;();0b;enlist x]`}', original_name) self._reload() @@ -620,6 +655,9 @@ def list_columns(self, table: str) -> None: ``` """ _check_loading(self, table, 'Column listing') + table_type = _get_type(self, table) + if table_type == 'splayed': + return q('get', self.path / table / '.d').py() return q.dbmaint.listcols(self.path, table).py() def add_column(self, @@ -658,7 +696,11 @@ def add_column(self, ``` """ _check_loading(self, table, 'Column addition') - q.dbmaint.addcol(self.path, table, column_name, default_value) + table_type = _get_type(self, table) + if table_type == 'splayed': + q.dbmaint.add1col(self.path / table, column_name, default_value) + else: + q.dbmaint.addcol(self.path, table, column_name, default_value) self._reload() return(None) @@ -687,7 +729,7 @@ def find_column(self, table: str, column_name: str) -> None: ['testTable'] >>> db.list_columns('testTable') ['month', 'sym', 'time', 'price', 'size'] - >>> db.find_column('price') + >>> db.find_column('testTable', 'price') 2023.11.10 16:48:57 column price (type 0) in `:/usr/pykx/db/2015.01.01/testTable 2023.11.10 16:48:57 column price (type 0) in `:/usr/pykx/db/2015.01.02/testTable ``` @@ -702,7 +744,7 @@ def find_column(self, table: str, column_name: str) -> None: ['testTable'] >>> db.list_columns('testTable') ['month', 'sym', 'time', 'price', 'size'] - >>> db.find_column('side') + >>> db.find_column('testTable', 'side') 2023.11.10 16:49:02 column side *NOT*FOUND* in `:/usr/pykx/db/2015.01.01/testTable 2023.11.10 16:49:02 column side *NOT*FOUND* in `:/usr/pykx/db/2015.01.02/testTable Traceback (most recent call last): @@ -711,6 +753,9 @@ def find_column(self, table: str, column_name: str) -> None: ``` """ _check_loading(self, table, 'Finding columns') + table_type = _get_type(self, table) + if 'splayed' == table_type: + return q.dbmaint.find1col(self.path / table, column_name).py() return q.dbmaint.findcol(self.path, table, column_name).py() def reorder_columns(self, table: str, new_order: list) -> None: @@ -747,7 +792,12 @@ def reorder_columns(self, table: str, new_order: list) -> None: ``` """ _check_loading(self, table, 'Column reordering') - q.dbmaint.reordercols(self.path, table, new_order) + table_type = _get_type(self, table) + if 'splayed' == table_type: + q.dbmaint.reordercols0(self.path / table, new_order) + else: + q.dbmaint.reordercols(self.path, table, new_order) + self._reload() return None def set_column_attribute(self, table: str, column_name: str, new_attribute: str) -> None: @@ -797,6 +847,7 @@ def set_column_attribute(self, table: str, column_name: str, new_attribute: str) """ _check_loading(self, table, 'Attribute setting') _check_column(self, table, column_name) + table_type = _get_type(self, table) if new_attribute not in ['s', 'g', 'p', 'u', 'sorted', 'grouped', 'partitioned', 'unique']: raise QError("new_attribute must be one of " @@ -806,7 +857,11 @@ def set_column_attribute(self, table: str, column_name: str, new_attribute: str) 'grouped': 'g', 'partitioned': 'p', 'unique': 'u'}[new_attribute] - q.dbmaint.setattrcol(self.path, table, column_name, new_attribute) + if 'splayed' == table_type: + q.dbmaint.fn1col(self.path / table, column_name, q('{x#y}', new_attribute)) + else: + q.dbmaint.setattrcol(self.path, table, column_name, new_attribute) + self._reload() return None def set_column_type(self, table: str, column_name: str, new_type: k.K) -> None: @@ -857,8 +912,12 @@ def set_column_type(self, table: str, column_name: str, new_type: k.K) -> None: if new_type not in _ktype_to_conversion: raise QError("Unable to find user specified conversion type: " + str(new_type)) col_type = _ktype_to_conversion[new_type] + table_type = _get_type(self, table) try: - q.dbmaint.castcol(self.path, table, column_name, col_type) + if table_type == 'splayed': + q.dbmaint.fn1col(self.path / table, column_name, q('{x$y}', col_type)) + else: + q.dbmaint.castcol(self.path, table, column_name, col_type) except QError as err: if str(err) == 'type': raise QError("Unable to convert specified column '" + column_name + "' to type: " + str(new_type)) # noqa: E501 @@ -910,6 +969,9 @@ def clear_column_attribute(self, table: str, column_name: str) -> None: """ _check_loading(self, table, 'Attribute clearing') _check_column(self, table, column_name) + table_type = _get_type(self, table) + if 'splayed' == table_type: + q.dbmaint.fn1col(self.path / table, column_name, q('`#')) q.dbmaint.clearattrcol(self.path, table, column_name) return None @@ -942,7 +1004,11 @@ def copy_column(self, table: str, original_column: str, new_column: str) -> None """ _check_loading(self, table, 'Column copying') _check_column(self, table, original_column) - q.dbmaint.copycol(self.path, table, original_column, new_column) + table_type = _get_type(self, table) + if 'splayed' == table_type: + q.dbmaint.copy1col(self.path / table, original_column, new_column) + else: + q.dbmaint.copycol(self.path, table, original_column, new_column) self._reload() return None @@ -1027,7 +1093,11 @@ def apply_function(self, table: str, column_name: str, function: callable) -> No _check_column(self, table, column_name) if not callable(function): raise RuntimeError("Provided 'function' is not callable") - q.dbmaint.fncol(self.path, table, column_name, function) + table_type = _get_type(self, table) + if 'splayed' == table_type: + q.dbmaint.fn1col(self.path / table, column_name, function) + else: + q.dbmaint.fncol(self.path, table, column_name, function) self._reload() return None @@ -1120,14 +1190,21 @@ def partition_count(self, *, subview: Optional[list] = None) -> k.Dictionary: ``` """ qtables = self.tables + cache = None + try: + cache = q.Q.pv + except QError: + pass if subview==None: # noqa: E711 q.Q.view() else: q.Q.view(subview) for i in qtables: - q.Q.cn(getattr(self.table, i)) + tab = getattr(self.table, i) + if isinstance(tab, k.PartitionedTable): + q.Q.cn(tab) res = q('.Q.pv!flip .Q.pn') - q.Q.view() + q.Q.view(cache) return res def subview(self, view: list = None) -> None: diff --git a/src/pykx/embedded_q.py b/src/pykx/embedded_q.py index 68399ce..5ffcedb 100644 --- a/src/pykx/embedded_q.py +++ b/src/pykx/embedded_q.py @@ -12,7 +12,7 @@ from . import toq from . import wrappers from . import schema -from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_qdebug, pykx_threading, qargs, skip_under_q, suppress_warnings, pykx_debug_insights # noqa +from .config import find_core_lib, licensed, no_qce, pykx_dir, pykx_libs_dir, pykx_qdebug, pykx_threading, qargs, skip_under_q, suppress_warnings, pykx_debug_insights # noqa from .core import keval as _keval from .exceptions import FutureCancelled, LicenseException, NoResults, PyKXException, PyKXWarning, QError # noqa from ._wrappers import _factory as factory @@ -118,7 +118,7 @@ class EmbeddedQ(Q, metaclass=ABCMetaSingleton): """Interface for using q within the current python process. Call this to execute q code.""" def __init__(self): # noqa if licensed: - kxic_path = (pykx_dir/'lib').as_posix() + kxic_path = pykx_libs_dir.as_posix() kxic_file = 'kxic.k' pykx_qlib_path = (pykx_dir/'pykx').as_posix() # This q code is run as a single call into q to improve startup performance: @@ -153,8 +153,6 @@ def __init__(self): # noqa else: code += f'2:[`$"{pykx_qlib_path}q";(`k_pykx_init; 2)][`$"{find_core_lib("q").as_posix()}";{"1b" if pykx_threading else "0b"}];' # noqa: E501 code += f'`.pykx.modpow set {{((`$"{pykx_qlib_path}q") 2: (`k_modpow; 3))["j"$x;"j"$y;$[z~(::);(::);"j"$z]]}};' # noqa: E501 - if pykx_threading and (not suppress_warnings): - warn('pykx.q is not supported when using PYKX_THREADING.') code += '@[get;`.pykx.i.kxic.loadfailed;{()!()}]' kxic_loadfailed = self._call(code, skip_debug=True).py() if (not platform.system() == "Linux") and (not no_qce) and ('--no-sql' not in qargs): diff --git a/src/pykx/extensions/hdb.q b/src/pykx/extensions/hdb.q index 7461159..aa76923 100644 --- a/src/pykx/extensions/hdb.q +++ b/src/pykx/extensions/hdb.q @@ -1,4 +1,6 @@ -.tick.init:{[config] +\d .tick + +init:{[config] if[99h<>type config; '"Supplied configuration must be a dictionary" ]; @@ -7,3 +9,10 @@ ]; @[{system"l ",x;-1"Successfully loaded database: ",x;};string config`database;{-1"Database not loaded"}]; } + +tabs:() + +set_tables:{[tabname;schema] + tabs,:enlist[tabname]; + tabname set schema + } diff --git a/src/pykx/extensions/rdb.q b/src/pykx/extensions/rdb.q index 0c8cb5e..fc94bb9 100644 --- a/src/pykx/extensions/rdb.q +++ b/src/pykx/extensions/rdb.q @@ -40,3 +40,10 @@ init:{[config] .u.x:rdb_config`tickerplant`hdb; .u.rep[rdb_config`database] . hopen[`$":",.u.x 0]("{(.u.sub[;`]each x;`.u `i`L)}";.tick.subscriptions) } + +tabs:() + +set_tables:{[tabname;schema] + tabs,:enlist[tabname]; + tabname set schema + } diff --git a/src/pykx/ipc.py b/src/pykx/ipc.py index da3cc15..84a07c8 100644 --- a/src/pykx/ipc.py +++ b/src/pykx/ipc.py @@ -7,12 +7,15 @@ import asyncio from contextlib import nullcontext from multiprocessing import Lock as multiprocessing_lock, RawValue +from pathlib import Path import selectors import socket from threading import Lock as threading_lock from time import monotonic_ns, sleep from typing import Any, Callable, Optional, Union +import warnings from weakref import finalize, WeakMethod +import warnings import sys from . import deserialize, serialize, Q @@ -62,7 +65,7 @@ class MessageType(Enum): class QFuture(asyncio.Future): """ A Future object to be returned by calls to q from an instance of - [pykx.AsyncQConnection][pykx.AsyncQConnection]. + [pykx.AsyncQConnection][pykx.AsyncQConnection] or [pykx.RawQConnection][pykx.RawQConnection]. This object can be awaited to receive the resulting value. @@ -101,9 +104,16 @@ def __await__(self) -> Any: FutureCancelled: This QFuture instance has been cancelled and cannot be awaited. BaseException: If the future has an exception set it will be raised upon awaiting it. """ + async def closure(): + await self.q_connection._recv2(acceptAsync=True, fut=self) + await asyncio.sleep(0) + return self + if self.done(): return self.result() while not self.done(): + if self.done(): + return self.result() if self.poll_recv is not None: try: res = self.q_connection.poll_recv() @@ -113,7 +123,7 @@ def __await__(self) -> Any: self.set_exception(QError(str(e))) else: try: - self.q_connection._recv(acceptAsync=True) + return closure().__await__() except BaseException as e: if isinstance(e, QError): raise e @@ -160,20 +170,22 @@ def __await__(self) -> Any: async def __async_await__(self) -> Any: if self.done(): return self.result() + while not self.done(): await asyncio.sleep(0) if self.done(): return self.result() if self.poll_recv is not None: try: - res = self.q_connection.poll_recv() + res = await self.q_connection.poll_recv2(fut=self) if res is not None: self.set_result(res) + return res except BaseException as e: self.set_exception(QError(str(e))) else: try: - self.q_connection._recv(acceptAsync=True) + await self.q_connection._recv2(acceptAsync=True, fut=self) except BaseException as e: if isinstance(e, QError): raise e @@ -226,7 +238,7 @@ def _await(self) -> Any: except BaseException as e: if isinstance(e, QError): raise e - if self._connection_info['reconnection_attempts'] != -1: + if self.q_connection._connection_info['reconnection_attempts'] != -1: # TODO: Clear call stack futures print('WARNING: Connection lost attempting to reconnect.', file=sys.stderr) loops = self._connection_info['reconnection_attempts'] @@ -522,9 +534,9 @@ def _create_connection_to_server(self): ) self._sock.setblocking(0) object.__setattr__(self, '_reader', selectors.DefaultSelector()) - self._reader.register(self._sock, selectors.EVENT_READ, WeakMethod(self._recv_socket)) + self._reader.register(self._sock, selectors.EVENT_READ, (WeakMethod(self._recv_socket), WeakMethod(self._recv_socket2))) object.__setattr__(self, '_writer', selectors.DefaultSelector()) - self._writer.register(self._sock, selectors.EVENT_WRITE, WeakMethod(self._send_sock)) + self._writer.register(self._sock, selectors.EVENT_WRITE, (WeakMethod(self._send_sock), WeakMethod(self._send_sock))) def _init(self, host: Union[str, bytes] = 'localhost', @@ -614,9 +626,9 @@ def _init(self, if not isinstance(self, SecureQConnection): self._sock.setblocking(0) object.__setattr__(self, '_reader', selectors.DefaultSelector()) - self._reader.register(self._sock, selectors.EVENT_READ, WeakMethod(self._recv_socket)) + self._reader.register(self._sock, selectors.EVENT_READ, (WeakMethod(self._recv_socket), WeakMethod(self._recv_socket2))) object.__setattr__(self, '_writer', selectors.DefaultSelector()) - self._writer.register(self._sock, selectors.EVENT_WRITE, WeakMethod(self._send_sock)) + self._writer.register(self._sock, selectors.EVENT_WRITE, (WeakMethod(self._send_sock), WeakMethod(self._send_sock))) object.__setattr__(self, '_timeouts', 0) object.__setattr__(self, '_initialized', True) super().__init__() @@ -685,7 +697,7 @@ def _send(self, for key, _mask in events: callback = key.data if debugging: - return callback()( + return callback[0]()( key.fileobj, bytes(CharVector( '{[pykxquery] .Q.trp[{[x] (0b; value x)}; pykxquery;' @@ -697,7 +709,7 @@ def _send(self, debug=debug ) else: - return callback()(key.fileobj, query, *params, wait=wait, error=error, debug=debug) + return callback[0]()(key.fileobj, query, *params, wait=wait, error=error, debug=debug) def _ipc_query_builder(self, query, *params): data = bytes(query, 'utf-8') if isinstance(query, str) else query @@ -767,13 +779,56 @@ def _send_sock(self, return q_future # flake8: noqa: C901 - def _recv(self, locked=False, acceptAsync=False): + async def _recv2(self, locked=False, acceptAsync=False, fut: Optional[QFuture]=None): timeout = self._connection_info['timeout'] while self._timeouts > 0: events = self._reader.select(timeout) for key, _ in events: key.data()(key.fileobj) self._timeouts -= 1 + if isinstance(self, RawQConnection): + if len(self._send_stack) != 0: + self.poll_send(0) + start_time = monotonic_ns() + with self._lock if self._lock is not None and not locked else nullcontext(): + while True: + if fut is not None and fut.done(): + return fut.result() + if timeout != 0.0 and monotonic_ns() - start_time >= (timeout * 1000000000): + self._timeouts += 1 + raise QError('Query timed out') + events = self._reader.select(timeout) + for key, _ in events: + callback = key.data + msg_type, res = callback[0]()(key.fileobj) + if MessageType.sync_msg.value == msg_type: + print("WARN: Discarding unexpected sync message from handle: " + + str(self.fileno()), file=sys.stderr) + try: + self._send(SymbolAtom("PyKX cannot receive queries in client mode"), + error=True) + except BaseException: + pass + elif MessageType.async_msg.value == msg_type and not acceptAsync: + print("WARN: Discarding unexpected async message from handle: " + + str(self.fileno()), file=sys.stderr) + elif MessageType.resp_msg.value == msg_type or \ + MessageType.async_msg.value == msg_type: + return res + else: + raise RuntimeError('MessageType unknown') + return + await asyncio.sleep(0.0) + + + # flake8: noqa: C901 + def _recv(self, locked=False, acceptAsync=False): + timeout = self._connection_info['timeout'] + while self._timeouts > 0: + events = self._reader.select(timeout) + for key, _ in events: + key.data[0]()(key.fileobj) + self._timeouts -= 1 if isinstance(self, RawQConnection): if len(self._send_stack) == len(self._call_stack) and len(self._send_stack) != 0: self.poll_send() @@ -786,7 +841,7 @@ def _recv(self, locked=False, acceptAsync=False): events = self._reader.select(timeout) for key, _ in events: callback = key.data - msg_type, res = callback()(key.fileobj) + msg_type, res = callback[0]()(key.fileobj) if MessageType.sync_msg.value == msg_type: print("WARN: Discarding unexpected sync message from handle: " + str(self.fileno()), file=sys.stderr) @@ -804,6 +859,67 @@ def _recv(self, locked=False, acceptAsync=False): else: raise RuntimeError('MessageType unknown') + async def _recv_socket2(self, sock): + tot_bytes = 0 + chunks = [] + # message header + a = await self._loop.sock_recv(sock, 8) + chunks = list(a) + tot_bytes += 8 + if len(chunks) == 0: + try: + if self._connection_info['reconnection_attempts'] == -1: + self.close() + except BaseException: + self.close() + raise RuntimeError("Attempted to use a closed IPC connection") + elif len(chunks) <8: + try: + if self._connection_info['reconnection_attempts'] == -1: + self.close() + except BaseException: + self.close() + raise RuntimeError("PyKX attempted to process a message containing less than " + "the expected number of bytes, connection closed." + f"\nReturned bytes: {chunks}.\n" + "If you have a reproducible use-case please raise an " + "issue at https://github.com/kxsystems/pykx/issues with " + "the use-case provided.") + + # The last 5 bytes of the header contain the size and the first byte contains information + # about whether the message is encoded in big-endian or little-endian form + endianness = chunks[0] + if endianness == 1: # little-endian + size = chunks[3] + for i in range(7, 3, -1): + size = size << 8 + size += chunks[i] + else: # nocov + # big-endian + size = chunks[3] + for i in range(4, 8): + size = size << 8 + size += chunks[i] + + buff = bytearray(size) + chunks = bytearray(chunks) + for i in range(8): + buff[i] = chunks[i] + view = memoryview(buff)[8:] + # message body + while tot_bytes < size: + try: + to_read = min(self._socket_buffer_size, size - tot_bytes) + read, _ = await self._loop.sock_recvfrom_into(sock, view, to_read) + view = view[read:] + tot_bytes += read + except BlockingIOError: # nocov + # The only way to get here is if we start processing a message before all the data + # has been received by the socket + pass + res = chunks[1], self._create_result(buff) + return res + def _recv_socket(self, sock): tot_bytes = 0 chunks = [] @@ -961,17 +1077,33 @@ def file_execute( conn.file_execute('/User/path/to/file.q') ``` """ + wlist = ['k', 'q', 'p', 'py'] with open(pykx_lib_dir/'q.k', 'r') as f: lines = f.readlines() for line in lines: if 'pykxld:' in line: - ld = line[7:] + ld = line[7:].encode() + if isinstance(file_path, str): + path_stem = Path(file_path).suffix[1:] + if not path_stem in wlist: + raise QError(f"Provided file type '{path_stem}' unsupported") with open(file_path) as f: lines = f.readlines() - return self("{[fn;code;file] value (@';last file;enlist[file],/:value[\"k)\",string fn]string code)}", # noqa : E501 + lines = [CharVector(i.rstrip('\n')) for i in lines] + return self(""" + {[fn;code;file;stem] + $[any stem~/:("p";"py"); + $[`pykx in key `; + .pykx.pyexec "\n" sv code; + '"PyKX must be loaded on remote server"]; + value (@';last file;enlist[file],/:value[\"k)\",fn]code) + ] + } + """, ld, lines, bytes(file_path, 'utf-8'), + bytes(path_stem, 'utf-8'), wait=return_all) def fileno(self) -> int: @@ -1324,8 +1456,7 @@ def __init__(self, port: The port to which a connection is to be established. username: Username for q connection authorization. password: Password for q connection authorization. - timeout: Timeout for blocking socket operations in seconds. If set to 0, the socket - will be non-blocking. + timeout: Timeout is not supported when using `AsyncQConnection` objects. large_messages: Whether support for messages >2GB should be enabled. tls: Whether TLS should be used. unix: Whether a Unix domain socket should be used instead of TCP. If set to @@ -1339,7 +1470,8 @@ def __init__(self, method then you can provide the event loop here and the returned future object will be an instance of the loops future type. This will allow the current event loop to manage awaiting `#!python QFuture` objects as well as any other async tasks that - may be running. + may be running. If no event loop is provided the default result of + `aysncio.get_event_loop()` will be used. no_ctx: This parameter determines whether or not the context interface will be disabled. disabling the context interface will stop extra q queries being sent but will disable the extra features around the context interface. @@ -1364,12 +1496,6 @@ def __init__(self, the q server requires authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information. - Note: The `#!python timeout` argument may not always be enforced when making - successive queries. When making successive queries if one query times out the next query - will wait until a response has been received from the previous query before starting the - timer for its own timeout. This can be avoided by using a separate - `#!python QConnection` instance for each query. - Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used. Warning: AsyncQConnections will not resend queries that have not completed on reconnection. @@ -1428,22 +1554,32 @@ async def main(): 0 1 2 3 4 5 6 7 8 9 ``` """ + if timeout != 0.0: + warnings.warn('Timeout is not supported when using AsyncQConnection objects.') # TODO: Remove this once TLS support is fixed if tls: raise PyKXException('TLS is currently only supported for SyncQConnections') + loop = event_loop + if loop is None: + # `asyncio.get_event_loop()` used to do this automatically but it is deprecated as of + # Python 3.12, so we do this weird try - except to future proof. + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() object.__setattr__(self, '_stored_args', { 'host': host, 'port': port, 'args': args, 'username': username, 'password': password, - 'timeout': timeout, + 'timeout': 0.0, 'large_messages': large_messages, 'tls': tls, 'unix': unix, 'wait': wait, 'lock': lock, - 'loop': event_loop, + 'loop': loop, 'no_ctx': no_ctx, 'reconnection_attempts':reconnection_attempts, 'reconnection_delay': reconnection_delay, @@ -1528,6 +1664,7 @@ def __call__(self, wait: bool = True, reuse: bool = True, debug: bool = False, + async_response: bool = False, ) -> QFuture: """Evaluate a query on the connected q process over IPC. @@ -1547,6 +1684,11 @@ def __call__(self, if using q queries that respond in a deferred/asynchronous manner this should be set to `#!python False` so the query can be made in a dedicated `#!python AsyncQConnection` instance. + async_response: When using `reuse=False` and `wait=False` if an asynchronous response is + expected you can use this argument to keep the connection alive until an + asynchronous response has been received. Awaiting the inital returned future object + will return a second future that you can await upon to recieve the asynchronous + response. Returns: A QFuture object that can be awaited on to get the result of the query. @@ -1606,6 +1748,10 @@ def __call__(self, await q(kx.q.floor, [5.2, 10.4]) ``` """ + if async_response and reuse: + warnings.warn('Cannot use async_response=True without reuse=False.') + if async_response and wait: + warnings.warn('Cannot use async_response=True without wait=False.') if not reuse: conn = _DeferredQConnection(self._stored_args['host'], self._stored_args['port'], @@ -1619,6 +1765,13 @@ def __call__(self, wait=self._stored_args['wait'], no_ctx=self._stored_args['no_ctx']) q_future = conn(query, *args, wait=wait, debug=debug) + if async_response and not wait: + q_future2 = QFuture(conn, conn._connection_info['timeout'], debug) + conn._call_stack.append(q_future2) + q_future2 = self._loop.create_task(q_future2.__async_await__()) + q_future2.add_done_callback(lambda x: conn.close()) + q_future.set_result(q_future2) + return q_future q_future.add_done_callback(lambda x: conn.close()) if self._loop is None: return q_future @@ -1758,7 +1911,7 @@ async def close(self) -> None: events = self._reader.select() for key, _mask in events: callback = key.data - callback()(key.fileobj) + callback[0]()(key.fileobj) object.__setattr__(self, 'closed', True) self._reader.unregister(self._sock) self._writer.unregister(self._sock) @@ -1830,7 +1983,7 @@ def close(self) -> None: events = self._reader.select() for key, _mask in events: callback = key.data - callback()(key.fileobj) + callback[0]()(key.fileobj) object.__setattr__(self, 'closed', True) self._reader.unregister(self._sock) self._writer.unregister(self._sock) @@ -1905,8 +2058,7 @@ def __init__(self, port: The port to which a connection is to be established. username: Username for q connection authorization. password: Password for q connection authorization. - timeout: Timeout for blocking socket operations in seconds. If set to 0, the socket - will be non-blocking. + timeout: Timeout is not supported when using `RawQConnection` objects. large_messages: Whether support for messages >2GB should be enabled. tls: Whether TLS should be used. unix: Whether a Unix domain socket should be used instead of TCP. If set to @@ -1916,11 +2068,12 @@ def __init__(self, Python will wait for the q server to execute the query, and respond with the results. If `#!python False`, the q server will respond immediately to every query with generic null (`#!q ::`), then execute them at some point in the future. - event_loop: If running an event loop that supports the `#!python create_task()` method - then you can provide the event loop here and the returned future object will be an - instance of the loops future type. This will allow the current event loop to manage - awaiting `#!python QFuture` objects as well as any other async tasks that may be - running. + event_loop: If running an event loop that supports the `#!python create_task()` + method then you can provide the event loop here and the returned future object will + be an instance of the loops future type. This will allow the current event loop + to manage awaiting `#!python QFuture` objects as well as any other async tasks that + may be running. If no event loop is provided the default result of + `aysncio.get_event_loop()` will be used. no_ctx: This parameter determines whether or not the context interface will be disabled. disabling the context interface will stop extra q queries being sent but will disable the extra features around the context interface. @@ -1937,12 +2090,6 @@ def __init__(self, server requires authorization. Refer to [ssl documentation](https://code.kx.com/q/kb/ssl/) for more information. - Note: The `#!python timeout` argument may not always be enforced when making successive - queries. When making successive queries if one query times out the next query will wait - until a response has been received from the previous query before starting the timer for - its own timeout. This can be avoided by using a separate `#!python QConnection` instance - for each query. - Note: The overhead of calling `#!python clean_open_connections` is large. When running as a server you should ensure that `#!python clean_open_connections` is called fairly infrequently as the overhead of clearing all the dead connections can be @@ -1951,6 +2098,11 @@ def __init__(self, Note: When querying KX Insights the `#!python no_ctx=True` keyword argument must be used. + Note: 3.1 Upgrade considerations + As of PyKX version 3.1 all QFuture objects returned from calls to `RawQConnection` + objects must be awaited to recieve their results. Previously you could use just + `conn.poll_recv()` and then directly get the result with `future.result()`. + Raises: PyKXException: Using both tls and unix is not possible with a QConnection. @@ -1975,21 +2127,31 @@ def __init__(self, await pykx.RawQConnection(port=5001, unix=True) ``` """ + if timeout != 0.0: + warnings.warn('Timeout is not supported when using AsyncQConnection objects.') # TODO: Remove this once TLS support is fixed if tls: raise PyKXException('TLS is currently only supported for SyncQConnections') + loop = event_loop + if loop is None: + # `asyncio.get_event_loop()` used to do this automatically but it is deprecated as of + # Python 3.12, so we do this weird try - except to future proof. + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() object.__setattr__(self, '_stored_args', { 'host': host, 'port': port, 'args': args, 'username': username, 'password': password, - 'timeout': timeout, + 'timeout': 0.0, 'large_messages': large_messages, 'tls': tls, 'unix': unix, 'wait': wait, - 'loop': event_loop, + 'loop': loop, 'no_ctx': True if as_server else no_ctx, 'as_server': as_server, 'conn_gc_time': conn_gc_time, @@ -2130,9 +2292,12 @@ def __call__(self, """ if not self._initialized: raise UninitializedConnection() - res = QFuture(self, self._connection_info['timeout'], debug) + fut = QFuture(self, self._connection_info['timeout'], debug) + res = self._loop.create_task( + fut.__async_await__() + ) self._send_stack.append({'query': query, 'args': args, 'wait': wait, 'debug': debug}) - self._call_stack.append(res) + self._call_stack.append(fut) return res def _call(self, @@ -2312,7 +2477,7 @@ def _poll_server(self, amount: int = 1): # noqa events = reader.select(timeout) for key, _ in events: callback = key.data - res = callback()(key.fileobj) + res = callback[0]()(key.fileobj) if res is None: count -= 1 if count > 1: @@ -2342,7 +2507,7 @@ def _poll_server(self, amount: int = 1): # noqa elif MessageType.async_msg.value == msg_type: print(e) if MessageType.sync_msg.value == msg_type: - callback()(key.fileobj, res, level) + callback[0]()(key.fileobj, res, level) count -= 1 if count > 1: return @@ -2399,6 +2564,83 @@ def poll_recv_async(self): return self._loop.create_task(q_future.__async_await__()) return q_future + async def poll_recv2(self, amount: int = 1, fut: Optional[QFuture] = None): + """Recieve queries from the process connected to over IPC. + + Parameters: + amount: The number of receive requests to handle, defaults to one, if 0 is used then + all currently waiting responses will be received. + + Raises: + QError: Query timed out, may be raised if the time taken to make or receive a query goes + over the timeout limit. + + Examples: + + ```python + q = await pykx.RawQConnection(host='localhost', port=5002) + ``` + + Receive a single queued message. + + ```python + q_fut = q('til 10') # not sent yet + q.poll_send() # message is sent + q.poll_recv() # message response is received + ``` + + Receive two queued messages. + + ```python + q_fut = q('til 10') # not sent yet + q_fut2 = q('til 10') # not sent yet + q.poll_send(2) # messages are sent + q.poll_recv(2) # message responses are received + ``` + + Receive all queued messages. + + ```python + q_fut = q('til 10') # not sent yet + q_fut2 = q('til 10') # not sent yet + q.poll_send(0) # all messages are sent + q.poll_recv(0) # all message responses are received + ``` + """ + if fut is not None and fut.done(): + return fut.result() + count = amount + timeout = self._connection_info['timeout'] + if self._stored_args['as_server']: + self._poll_server(amount) + else: + last = None + if count == 0: + count = len(self._call_stack) if len(self._call_stack) > 0 else 1 + while count >= 0: + if fut is not None and fut.done(): + return fut.result() + start_time = monotonic_ns() + with self._lock if self._lock is not None else nullcontext(): + if timeout != 0.0 and monotonic_ns() - start_time >= (timeout * 1000000000): + self._timeouts += 1 + raise QError('Query timed out') + events = self._reader.select(timeout) + if len(events) != 0: + for key, _ in events: + callback = key.data + res = callback[0]()(key.fileobj) + res = res[1] if isinstance(res, tuple) else res + if count == 1: + return res + count -= 1 + last = res + else: + if count == 1: + return last + count -= 1 + return last + def poll_recv(self, amount: int = 1): """Recieve queries from the process connected to over IPC. @@ -2460,7 +2702,7 @@ def poll_recv(self, amount: int = 1): if len(events) != 0: for key, _ in events: callback = key.data - res = callback()(key.fileobj) + res = callback[0]()(key.fileobj) res = res[1] if isinstance(res, tuple) else res if count == 1: return res @@ -2507,7 +2749,7 @@ async def close(self) -> None: events = self._reader.select() for key, _mask in events: callback = key.data - callback(key.fileobj) + callback[0](key.fileobj) self._reader.unregister(self._sock) self._writer.unregister(self._sock) self._reader.close() diff --git a/src/pykx/lib/.gitignore b/src/pykx/lib/.gitignore new file mode 100644 index 0000000..0af2287 --- /dev/null +++ b/src/pykx/lib/.gitignore @@ -0,0 +1,12 @@ +*.q_ +*.k_ +**/*q.so +**/*q.dylib +**/q.dll +**/q.lib +**/kxreaper +**/libkurl.so +**/libobjstor.so +**/pg +pykx.q +pykx* diff --git a/src/pykx/lib/4-1-libs/.gitignore b/src/pykx/lib/4-1-libs/.gitignore new file mode 100644 index 0000000..0af2287 --- /dev/null +++ b/src/pykx/lib/4-1-libs/.gitignore @@ -0,0 +1,12 @@ +*.q_ +*.k_ +**/*q.so +**/*q.dylib +**/q.dll +**/q.lib +**/kxreaper +**/libkurl.so +**/libobjstor.so +**/pg +pykx.q +pykx* diff --git a/src/pykx/lib/4-1-libs/l64/.gitkeep b/src/pykx/lib/4-1-libs/l64/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/pykx/lib/4-1-libs/l64arm/.gitkeep b/src/pykx/lib/4-1-libs/l64arm/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/pykx/lib/4-1-libs/m64/.gitkeep b/src/pykx/lib/4-1-libs/m64/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/pykx/lib/4-1-libs/m64arm/.gitkeep b/src/pykx/lib/4-1-libs/m64arm/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/pykx/lib/4-1-libs/w64/.gitkeep b/src/pykx/lib/4-1-libs/w64/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/pykx/lib/l64/p.so b/src/pykx/lib/l64/p.so deleted file mode 100755 index e97455b..0000000 Binary files a/src/pykx/lib/l64/p.so and /dev/null differ diff --git a/src/pykx/lib/m64/p.so b/src/pykx/lib/m64/p.so deleted file mode 100755 index 76e9088..0000000 Binary files a/src/pykx/lib/m64/p.so and /dev/null differ diff --git a/src/pykx/lib/m64arm/p.so b/src/pykx/lib/m64arm/p.so deleted file mode 100755 index 76e9088..0000000 Binary files a/src/pykx/lib/m64arm/p.so and /dev/null differ diff --git a/src/pykx/license.py b/src/pykx/license.py index 61de202..8a18c03 100644 --- a/src/pykx/license.py +++ b/src/pykx/license.py @@ -62,7 +62,7 @@ def check(license: str, >>> import pykx as kx >>> check = kx.license.check('/usr/location/kc.lic') Supplied license information does not match. - Please consider reinstalling your license using pykx.util.install_license + Please consider reinstalling your license using pykx.license.install Installed license representation: b'iIXSiEWzCNTkkCWK5Gggy..' @@ -78,7 +78,7 @@ def check(license: str, >>> import pykx as kx >>> check = kx.license.check('setup.py', license_type='kc.lic') Unable to find an installed license: kc.lic at location: /usr/local/anaconda3/envs/qenv/q. - Please consider installing your license again using pykx.util.install_license + Please consider installing your license again using pykx.license.install >>> check False ``` @@ -97,7 +97,7 @@ def check(license: str, if not license_located: print(f'Unable to find an installed license: {license_type} at location: {str(qlic)}.\n' - 'Please consider installing your license again using pykx.util.install_license') + 'Please consider installing your license again using pykx.license.install') return False with open(installed_lic, 'rb') as f: @@ -121,7 +121,7 @@ def check(license: str, if not license_content == license: print('Supplied license information does not match.\n' - 'Please consider reinstalling your license using pykx.util.install_license\n\n' + 'Please consider reinstalling your license using pykx.license.install\n\n' f'Installed license representation:\n{license_content}\n\n' f'User expected license representation:\n{license}') return False diff --git a/src/pykx/pandas_api/pandas_indexing.py b/src/pykx/pandas_api/pandas_indexing.py index 2ae9c69..7d76ce3 100644 --- a/src/pykx/pandas_api/pandas_indexing.py +++ b/src/pykx/pandas_api/pandas_indexing.py @@ -2,8 +2,6 @@ from ..exceptions import QError from . import api_return, MetaAtomic -import warnings - def _init(_q): global q @@ -435,13 +433,8 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index return t - def rename(self, labels=None, index=None, columns=None, axis=0, - copy=None, inplace=False, level=None, errors='ignore', mapper=None): - if labels is not None: - warnings.warn("Keyword 'labels' is deprecated please use 'mapper'", - DeprecationWarning) - if mapper is None: - mapper = labels + def rename(self, mapper=None, index=None, columns=None, axis=0, + copy=None, inplace=False, level=None, errors='ignore'): if ("Keyed" not in str(type(self)) and columns is None and ((axis == 'index' or axis == 0) or (index is not None))): raise ValueError("Can only rename index of a KeyedTable") diff --git a/src/pykx/pykx.q b/src/pykx/pykx.q index 752b559..c7e48b2 100644 --- a/src/pykx/pykx.q +++ b/src/pykx/pykx.q @@ -4,21 +4,20 @@ // @category api // @end +if[`e in key`.p; + if[not"{.pykx.pyexec x}"~string get `.p.e; + -1"Warning: Detected invalid '.p.e' function definition expected for PyKX.\n", + "Have you loaded another Python integration first?\n\n", + "Please consider full installation of PyKX under q following instructions at:\n", + "https://code.kx.com/pykx/pykx-under-q/intro.html#install.\n"; + '"Unable to load PyKX, see logged output for more information" + ] + ] + // @private // @desc Process context prior to PyKX initialization .pykx.util.prevCtx:system"d"; -@[ - {if[not"{.pykx.pyexec x}"~string get x; - -1"Warning: Detected invalid '.p.e' function definition expected for PyKX.\n", - "Have you loaded another Python integration first?\n\n", - "Please consider full installation of PyKX under q following instructions at:\n", - "https://code.kx.com/pykx/pykx-under-q/intro.html#installation.\n" - ] - }; - `.p.e; - {::}] - \d .pykx // @private @@ -65,7 +64,7 @@ if[not "true"~lower getenv`PYKX_LOADED_UNDER_Q; if[not count pykxDir:getenv`PYKX_DIR; util.dirSysCall:{ret:system x," ",util.dirCommand;util.whichPython:x;ret}; pykxDir:$[count util.whichPython;util.dirSysCall[util.whichPython]; - @[util.dirSysCall;"python";{util.dirSysCall["python3"]}] + @[util.dirSysCall;"python3";{util.dirSysCall["python"]}] ]; pykxDir:ssr[;"\\";"/"]last vs["PYKX_DIR: "]last pykxDir ]; @@ -177,11 +176,12 @@ util.ispd :util.isch[`..pandas] util.ispa :util.isch[`..pyarrow] util.isk :util.isch[`..k] util.israw :util.isch[`..raw] +util.ispt :util.isch[`..torch] // @private // @desc // Determine if a supplied object requires conversion -util.isconv:{any(util.ispy;util.isnp;util.ispd;util.ispa;util.isk;util.israw)@\:x} +util.isconv:{any(util.ispy;util.isnp;util.ispd;util.ispa;util.isk;util.israw;util.ispt)@\:x} // @private // @desc Convert a supplied argument to the specified python object type @@ -195,6 +195,7 @@ util.convertArg:{ util.ispd x 0; [.z.s[(x[0][::][1]; 3; x[2])]]; util.ispa x 0; [.z.s[(x[0][::][1]; 4; x[2])]]; util.isk x 0; [.z.s[(x[0][::][1]; 5; x[2])]]; + util.ispt x 0; [.z.s[(x[0][::][1]; 6; x[2])]]; util.israw x 0; [.z.s[(x[0][::][1]; x[1]; 1b)]]; '"Unsupported conversion attempted" ] @@ -213,6 +214,7 @@ util.toDefault:{ "k" ~ util.defaultConv;tok; "raw" ~ util.defaultConv;toraw; "default" ~ util.defaultConv;todefault; + "pt" ~ util.defaultConv;topt; (::) ]x }; @@ -479,12 +481,12 @@ util.parseArgs:{ // enlist[`..python;;][0 1 2 3 4 5 6 7 8 9] // // // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 -// +// q).pykx.typepy til 10 +// "" // // // Pass a q object to Python treating the Python object as a Python Object -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topy til 10 -// +// q).pykx.typepy .pykx.topy til 10 +// "" // ``` topy:{x y}(`..python;;) @@ -518,12 +520,12 @@ topy:{x y}(`..python;;) // q).pykx.util.defaultConv:"py" // // // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 -// +// q).pykx.typepy til 10 +// "" // // // Pass a q object to Python treating the Python object as a Numpy Object -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tonp til 10 -// +// q).pykx.typepy .pykx.tonp til 10 +// "" // ``` tonp:{x y}(`..numpy;;) @@ -555,12 +557,12 @@ tonp:{x y}(`..numpy;;) // // // // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 -// +// q).pykx.typepy til 10 +// "" // // // Pass a q object to Python treating the Python object as a Pandas Object -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topd til 10 -// +// q).pykx.typepy .pykx.topd til 10 +// "" // ``` topd:{x y}(`..pandas;;) @@ -591,15 +593,54 @@ topd:{x y}(`..pandas;;) // enlist[`..pyarrow;;][0 1 2 3 4 5 6 7 8 9] // // // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 -// +// q).pykx.typepy til 10 +// "" // // // Pass a q object to Python treating the Python object as a PyArrow Object -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.topa til 10 -// +// q).pykx.typepy .pykx.topa til 10 +// "" // ``` topa:{x y}(`..pyarrow;;) +// @name .pykx.topt +// @category api +// @overview +// _Tag a q object to be indicate conversion to a PyTorch object when called in Python (BETA)_ +// +// ```q +// .pykx.topt[qObject] +// ``` +// +// **Parameters:** +// +// name | type | description | +// ----------|---------|-------------| +// `qObject` | `any` | A q object which is to be defined as a PyTorch object in Python. | +// +// **Return:** +// +// type | description +// -------------|------------ +// `projection` | A projection which is used to indicate that once the q object is passed to Python for evaluation is should be treated as a Torch type object. | +// +// ```q +// // Denote that a q object once passed to Python should be managed as a NumPy object +// q).pykx.topt til 10 +// enlist[`..torch;;][0 1 2 3 4 5 6 7 8 9] +// +// // Update the default conversion type to be non NumPy +// q).pykx.setdefault"pt" +// +// // Pass a q object to Python with default conversions and return type +// q).pykx.typepy til 10 +// "" +// +// // Pass a q object to Python treating the Python object as a Numpy Object +// q).pykx.typepy .pykx.tonp til 10 +// "" +// ``` +topt:{x y}(`..torch;;) + // @name .pykx.tok // @category api // @overview @@ -627,12 +668,12 @@ topa:{x y}(`..pyarrow;;) // enlist[`..k;;][0 1 2 3 4 5 6 7 8 9] // // // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 -// +// q).pykx.typepy til 10 +// "" // // // Pass a q object to Python treating the Python object as a PyKX object -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.tok til 10 -// +// q).pykx.typepy .pykx.tok til 10 +// "" // ``` tok: {x y}(`..k;;) @@ -663,12 +704,12 @@ tok: {x y}(`..k;;) // enlist[`..raw;;][0 1 2 3 4 5 6 7 8 9] // // // Pass a q object to Python with default conversions and return type -// q).pykx.print .pykx.eval["lambda x: type(x)"]til 10 -// +// q).pykx.typepy til 10 +// "" // // // Pass a q object to Python treating the Python object as a raw Object -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.toraw til 10 -// +// q).pykx.typepy .pykx.toraw til 10 +// "" // ``` toraw: {x y}(`..raw;;) @@ -703,12 +744,12 @@ toraw: {x y}(`..raw;;) // enlist[`..numpy;;][0 1 2 3 4 5 6 7 8 9] // // // Pass a q list to Python treating the Python object as PyKX default -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault (til 10;til 10) -// +// q).pykx.typepy .pykx.todefault (til 10;til 10) +// "" // // // Pass a q Table to Python by default treating the Python table as a Pandas DataFrame -// q).pykx.print .pykx.eval["lambda x: type(x)"] .pykx.todefault ([]til 10;til 10) -// +// q).pykx.typepy .pykx.todefault ([]til 10;til 10) +// "" // ``` todefault:{$[0h=type x;topy x;$[99h~type x;all 98h=type each(key x;value x);0b]|98h=type x;topd x;tonp x]} @@ -889,8 +930,23 @@ setdefault:{ // foreign // q).pykx.toq b // 2 -// ``` -py2q:toq:{$[type[x]in 104 105 112h;util.foreignToq[unwrap x;0b];x]} +// +// // Convert a PyKX conversion object back to q +// q).pykx.toq .pykx.topd ([]5?1f;5?`a`b`c) +// +// x x1 +// ------------ +// 0.3017723 a +// 0.785033 a +// 0.5347096 c +// 0.7111716 b +// 0.411597 c +// ``` +py2q:toq:{ + x:{$[util.isconv x;last value::;]x}/[x]; + if[type[x]in 104 105 112h;x:unwrap x]; + $[type[x]=112h;util.foreignToq[unwrap x;0b];x] + } // @kind function // @name .pykx.toq0 @@ -942,7 +998,9 @@ toq0:ce { [if[not -1h~type x 1;'"Supplied 2nd argument must be a boolean"]; fn:x 0;conv:x 1]; [fn:x 0;conv:0b]]; - $[type[fn]in 104 105 112h;util.foreignToq[unwrap fn;conv];fn] + fn:{$[util.isconv x;last value::;]x}/[fn]; + if[type[fn]in 104 105 112h;fn:unwrap fn]; + $[type[fn]=112h;util.foreignToq[fn;conv];fn] } // @private @@ -1703,7 +1761,10 @@ safeReimport:{[x] loadPy:{[file] if[10h<>type file;'"Parameter 'file' must be of type string"]; if[not last["." vs file]in("py";enlist"p");'"File extension must be .py/.p"]; - .pykx.pyexec"exec(open('",ssr[file;"\\";"\\\\"],"').read())" + @[.pykx.pyexec; + "exec(open('",ssr[file;"\\";"\\\\"],"').read())"; + {$[y like "FileNotFound*";'ssr[y;"2";x];'x]}[file] + ] } @@ -1875,6 +1936,44 @@ loadExtension:{[ext] ]; } +// @private +// @kind function +// @name .pykx.typepy +// @category api +// @overview +// _Determine the datatype of an object passed to Python and return it as a string_ +// +// ```q +// .pykx.typepy[object] +// ``` +// +// **Parameters:** +// +// name | type | description +// -----------|----------|------------- +// `object` | `any` | An object that is passed to python and its datatype determined. +// +// **Returns:** +// +// type | description +// ---------|------------ +// `string` | The string representation of an objects datatype after being passed to python +// +// **Example:** +// +// ```q +// q)\l pykx.q +// q).pykx.typepy 1 +// "" +// +// q).pykx.typepy (10?1f;10?1f) +// "" +// +// q).pykx.typepy ([]100?1f;100?1f) +// "" +// ``` +.pykx.typepy:{.pykx.eval["lambda x: str(type(x)).encode()";<]x} + // @desc Restore context used at initialization of script system"d ",string .pykx.util.prevCtx; diff --git a/src/pykx/pykxq.c b/src/pykx/pykxq.c index 9f61fd4..32a544d 100644 --- a/src/pykx/pykxq.c +++ b/src/pykx/pykxq.c @@ -40,7 +40,6 @@ static P M, errfmt; static void** N; int pykx_flag = -1; -bool pykx_threading = false; // Equivalent to starting Python with the `-S` flag. Allows us to edit some global config variables // before `site.main()` is called. @@ -68,8 +67,6 @@ static int check_py_foreign(K x){return x->t==112 && x->n==2 && *kK(x)==(K)py_de EXPORT K k_check_python(K x){return kb(check_py_foreign(x));} EXPORT K k_pykx_init(K k_q_lib_pat, K _pykx_threading) { - if (_pykx_threading->g) - pykx_threading = true; PyGILState_STATE gstate; gstate = PyGILState_Ensure(); diff --git a/src/pykx/lib/l64/kxreaper b/src/pykx/q.so/libs/4-0/l64/kxreaper similarity index 100% rename from src/pykx/lib/l64/kxreaper rename to src/pykx/q.so/libs/4-0/l64/kxreaper diff --git a/src/pykx/lib/l64/libkurl.so b/src/pykx/q.so/libs/4-0/l64/libkurl.so similarity index 100% rename from src/pykx/lib/l64/libkurl.so rename to src/pykx/q.so/libs/4-0/l64/libkurl.so diff --git a/src/pykx/lib/l64/libobjstor.so b/src/pykx/q.so/libs/4-0/l64/libobjstor.so similarity index 100% rename from src/pykx/lib/l64/libobjstor.so rename to src/pykx/q.so/libs/4-0/l64/libobjstor.so diff --git a/src/pykx/lib/l64/libq.so b/src/pykx/q.so/libs/4-0/l64/libq.so similarity index 100% rename from src/pykx/lib/l64/libq.so rename to src/pykx/q.so/libs/4-0/l64/libq.so diff --git a/src/pykx/lib/l64/pg b/src/pykx/q.so/libs/4-0/l64/pg similarity index 100% rename from src/pykx/lib/l64/pg rename to src/pykx/q.so/libs/4-0/l64/pg diff --git a/src/pykx/lib/l64/symbols.txt b/src/pykx/q.so/libs/4-0/l64/symbols.txt similarity index 100% rename from src/pykx/lib/l64/symbols.txt rename to src/pykx/q.so/libs/4-0/l64/symbols.txt diff --git a/src/pykx/lib/l64arm/libq.so b/src/pykx/q.so/libs/4-0/l64arm/libq.so similarity index 100% rename from src/pykx/lib/l64arm/libq.so rename to src/pykx/q.so/libs/4-0/l64arm/libq.so diff --git a/src/pykx/lib/m64/libq.dylib b/src/pykx/q.so/libs/4-0/m64/libq.dylib similarity index 100% rename from src/pykx/lib/m64/libq.dylib rename to src/pykx/q.so/libs/4-0/m64/libq.dylib diff --git a/src/pykx/lib/m64arm/libq.dylib b/src/pykx/q.so/libs/4-0/m64arm/libq.dylib similarity index 100% rename from src/pykx/lib/m64arm/libq.dylib rename to src/pykx/q.so/libs/4-0/m64arm/libq.dylib diff --git a/src/pykx/lib/w64/q.dll b/src/pykx/q.so/libs/4-0/w64/q.dll similarity index 100% rename from src/pykx/lib/w64/q.dll rename to src/pykx/q.so/libs/4-0/w64/q.dll diff --git a/src/pykx/lib/w64/q.lib b/src/pykx/q.so/libs/4-0/w64/q.lib similarity index 100% rename from src/pykx/lib/w64/q.lib rename to src/pykx/q.so/libs/4-0/w64/q.lib diff --git a/src/pykx/q.so/libs/4-1/l64/kxreaper b/src/pykx/q.so/libs/4-1/l64/kxreaper new file mode 100644 index 0000000..79151c2 Binary files /dev/null and b/src/pykx/q.so/libs/4-1/l64/kxreaper differ diff --git a/src/pykx/q.so/libs/4-1/l64/libkurl.so b/src/pykx/q.so/libs/4-1/l64/libkurl.so new file mode 100644 index 0000000..6f44fb4 Binary files /dev/null and b/src/pykx/q.so/libs/4-1/l64/libkurl.so differ diff --git a/src/pykx/q.so/libs/4-1/l64/libobjstor.so b/src/pykx/q.so/libs/4-1/l64/libobjstor.so new file mode 100644 index 0000000..828d6ad Binary files /dev/null and b/src/pykx/q.so/libs/4-1/l64/libobjstor.so differ diff --git a/src/pykx/lib/4-1-libs/l64/libq.so b/src/pykx/q.so/libs/4-1/l64/libq.so similarity index 100% rename from src/pykx/lib/4-1-libs/l64/libq.so rename to src/pykx/q.so/libs/4-1/l64/libq.so diff --git a/src/pykx/q.so/libs/4-1/l64/pg b/src/pykx/q.so/libs/4-1/l64/pg new file mode 100644 index 0000000..f5989b0 Binary files /dev/null and b/src/pykx/q.so/libs/4-1/l64/pg differ diff --git a/src/pykx/lib/4-1-libs/l64arm/libq.so b/src/pykx/q.so/libs/4-1/l64arm/libq.so similarity index 100% rename from src/pykx/lib/4-1-libs/l64arm/libq.so rename to src/pykx/q.so/libs/4-1/l64arm/libq.so diff --git a/src/pykx/lib/4-1-libs/m64/libq.dylib b/src/pykx/q.so/libs/4-1/m64/libq.dylib similarity index 100% rename from src/pykx/lib/4-1-libs/m64/libq.dylib rename to src/pykx/q.so/libs/4-1/m64/libq.dylib diff --git a/src/pykx/lib/4-1-libs/m64arm/libq.dylib b/src/pykx/q.so/libs/4-1/m64arm/libq.dylib similarity index 100% rename from src/pykx/lib/4-1-libs/m64arm/libq.dylib rename to src/pykx/q.so/libs/4-1/m64arm/libq.dylib diff --git a/src/pykx/lib/4-1-libs/w64/q.dll b/src/pykx/q.so/libs/4-1/w64/q.dll similarity index 100% rename from src/pykx/lib/4-1-libs/w64/q.dll rename to src/pykx/q.so/libs/4-1/w64/q.dll diff --git a/src/pykx/lib/4-1-libs/w64/q.lib b/src/pykx/q.so/libs/4-1/w64/q.lib similarity index 100% rename from src/pykx/lib/4-1-libs/w64/q.lib rename to src/pykx/q.so/libs/4-1/w64/q.lib diff --git a/src/pykx/lib/bq.q_ b/src/pykx/q.so/qk/bq.q_ similarity index 100% rename from src/pykx/lib/bq.q_ rename to src/pykx/q.so/qk/bq.q_ diff --git a/src/pykx/lib/kurl.q_ b/src/pykx/q.so/qk/kurl.q_ similarity index 100% rename from src/pykx/lib/kurl.q_ rename to src/pykx/q.so/qk/kurl.q_ diff --git a/src/pykx/lib/kurl.sidecar.q_ b/src/pykx/q.so/qk/kurl.sidecar.q_ similarity index 100% rename from src/pykx/lib/kurl.sidecar.q_ rename to src/pykx/q.so/qk/kurl.sidecar.q_ diff --git a/src/pykx/lib/licmet.q_ b/src/pykx/q.so/qk/licmet.q_ similarity index 100% rename from src/pykx/lib/licmet.q_ rename to src/pykx/q.so/qk/licmet.q_ diff --git a/src/pykx/lib/objstor.q_ b/src/pykx/q.so/qk/objstor.q_ similarity index 100% rename from src/pykx/lib/objstor.q_ rename to src/pykx/q.so/qk/objstor.q_ diff --git a/src/pykx/q.so/qk/pykx_init.q_ b/src/pykx/q.so/qk/pykx_init.q_ new file mode 100644 index 0000000..e121a7f Binary files /dev/null and b/src/pykx/q.so/qk/pykx_init.q_ differ diff --git a/src/pykx/lib/qlog.q_ b/src/pykx/q.so/qk/qlog.q_ similarity index 100% rename from src/pykx/lib/qlog.q_ rename to src/pykx/q.so/qk/qlog.q_ diff --git a/src/pykx/lib/rest.q_ b/src/pykx/q.so/qk/rest.q_ similarity index 100% rename from src/pykx/lib/rest.q_ rename to src/pykx/q.so/qk/rest.q_ diff --git a/src/pykx/lib/s.k_ b/src/pykx/q.so/qk/s.k_ similarity index 100% rename from src/pykx/lib/s.k_ rename to src/pykx/q.so/qk/s.k_ diff --git a/src/pykx/query.py b/src/pykx/query.py index f3a6e3c..08e1b43 100644 --- a/src/pykx/query.py +++ b/src/pykx/query.py @@ -1,6 +1,7 @@ """_This page documents query interfaces for querying q tables using PyKX._""" from abc import ABCMeta +import asyncio from typing import Any, Dict, List, Optional, Union from . import Q @@ -384,7 +385,7 @@ def _seud(self, table, query_type, columns=None, where=None, by=None, inplace=Fa wait=True, ) if inplace and isinstance(table, k.K): - if isinstance(res, QFuture): + if isinstance(res, QFuture) or isinstance(res, asyncio.Task): raise QError("'inplace' not supported with asynchronous query") if type(table) != type(res): raise QError('Returned data format does not match input type, ' diff --git a/src/pykx/tick.py b/src/pykx/tick.py index bb088ca..e7b31ce 100644 --- a/src/pykx/tick.py +++ b/src/pykx/tick.py @@ -273,6 +273,48 @@ def set_timer(self, timer: int = 1000) -> None: """ self._connection('{system"t ",string[x]}', timer) + def set_tables(self, tables: dict, tick: bool = False) -> None: + """ + Define the tables to be available to the process being initialized. + + Parameters: + tables: A dictionary mapping the name of a table to be defined on + the process to the table schema + tick: Is the process you are setting the table on a tickerplant? + + Returns: + On a process persist the table schema as the supplied name + + Example: + + Set a table 'trade' with a supplied schema on a tickerplant process + + ```python + >>> import pykx as kx + >>> trade = kx.schema.builder({ + ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom, + ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom, + ... 'px': kx.FloatAtom}) + >>> tick = kx.tick.TICK(port=5030) + >>> tick.set_tables({'trade': trade}) + >>> tick('trade') + pykx.Table(pykx.q(' + time sym exchange sz px + ----------------------- + ')) + ``` + """ + for key, value in tables.items(): + if not isinstance(key, str): + raise QError('Provided table name must be an "str"') + if not isinstance(value, k.Table): + raise QError('Provided table schema must be an "kx.Table"') + if tick: + if not q('~', ['time', 'sym'], value.columns[:2]): + raise QError("'time' and 'sym' must be first two columns " + f"in Table: {key}") + self._connection('.tick.set_tables', key, value) + class TICK(STREAMING): """ @@ -490,15 +532,7 @@ def set_tables(self, tables: dict) -> None: ')) ``` """ - for key, value in tables.items(): - if not isinstance(key, str): - raise QError('Provided table name must be an "str"') - if not isinstance(value, k.Table): - raise QError('Provided table schema must be an "kx.Table"') - if not q('~', ['time', 'sym'], value.columns[:2]): - raise QError("'time' and 'sym' must be first two columns " - f"in Table: {key}") - self._connection('.tick.set_tables', key, value) + super().set_tables(tables, tick=True) def set_snap(self, snap_function: Callable) -> None: """ @@ -586,6 +620,8 @@ class RTP(STREAMING): init_args: A list of arguments passed to the initialized q process at startup denoting the command line options to be used for the initialized q process see [here](https://code.kx.com/q/basics/cmdline/) for a full breakdown. + tables: A dictionary mapping the names of tables and their schemas which can be + used to define the tables available to the real-time processor. Returns: On successful initialisation will initialise the RTP process and set @@ -698,10 +734,12 @@ def __init__(self, vanilla: bool = True, pre_processor: Callable = None, post_processor: Callable = None, - init_args: list = None) -> None: + init_args: list = None, + tables: dict = None) -> None: self._subscriptions=subscriptions self._pre_processor=pre_processor self._post_processor=post_processor + self._tables = tables self._vanilla = vanilla self._name = 'Real-time' @@ -720,6 +758,8 @@ def __init__(self, self.post_processor(post_processor) if subscriptions is not None: self.subscriptions(subscriptions) + if isinstance(tables, dict): + self.set_tables(tables) except BaseException as err: print(f'{self._name} processor failed to initialise on port: {port}\n') if self._connection is not None: @@ -851,7 +891,8 @@ def restart(self) -> None: apis=self._apis, vanilla=self._vanilla, pre_processor=self._pre_processor, - post_processor=self._post_processor) + post_processor=self._post_processor, + tables=self._tables) if self._init_config is not None: self.init(config=self._init_config) print(f'{self._name} processor on port {self._port} successfully restarted\n') @@ -984,6 +1025,40 @@ def post_processor(self, function: Callable) -> None: function.__name__, '.tick.RTPPostProc') + def set_tables(self, tables: dict) -> None: + """ + Define tables to be available on the RTP processes. + + Parameters: + tables: A dictionary mapping the name of a table to be defined on + the process to the table schema + + Returns: + On the RTP persist the table schemas as the supplied name + + Example: + + Set a table 'trade' with a supplied schema on a tickerplant process + + ```python + >>> import pykx as kx + >>> prices = kx.schema.builder({ + ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom, + ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom, + ... 'px': kx.FloatAtom}) + >>> rte = kx.tick.RTP(port=5034, + ... subscriptions = ['trade', 'quote'], + ... vanilla=False) + >>> rte.set_tables({'prices': prices}) + >>> rte('prices') + pykx.Table(pykx.q(' + time sym exchange sz px + ----------------------- + ')) + ``` + """ + super().set_tables(tables) + def subscriptions(self, sub_list): self._connection('{.tick.subscriptions:x}', sub_list) @@ -1007,6 +1082,8 @@ class HDB(STREAMING): init_args: A list of arguments passed to the initialized q process at startup denoting the command line options to be used for the initialized q process see [here](https://code.kx.com/q/basics/cmdline/) for a full breakdown. + tables: A dictionary mapping the names of tables and their schemas which can be + used to define the tables available to the HDB. Returns: On successful initialisation will initialise the HDB process and set @@ -1049,10 +1126,12 @@ def __init__(self, process_logs: Union[str, bool] = True, libraries: dict = None, apis: dict = None, - init_args: list = None): + init_args: list = None, + tables: dict = None): self._name = 'HDB' self._libraries = libraries self._apis = apis + self._tables = tables print(f'Initialising {self._name} process on port: {port}') try: super().__init__(port, @@ -1061,6 +1140,8 @@ def __init__(self, libraries=libraries, init_args=init_args) self._connection('.pykx.loadExtension["hdb"]') + if isinstance(tables, dict): + super().set_tables(tables) except BaseException as err: print(f'{self._name} failed to initialise on port: {port}\n') if self._connection is not None: @@ -1146,11 +1227,46 @@ def restart(self) -> None: self.__init__(port=self._port, process_logs=self._process_logs, libraries=self._libraries, - apis=self._apis) + apis=self._apis, + tables=self._tables) if self._init_config is not None: self.init(self._database, self._init_config) print(f'{self._name} on port {self._port} successfully restarted\n') + def set_tables(self, tables: dict) -> None: + """ + Define tables to be available on the HDB processes. + + Parameters: + tables: A dictionary mapping the name of a table to be defined on + the process to the table schema + + Returns: + On the HDB persist the table schemas as the supplied name + + Example: + + Set a table 'prices' with a supplied schema on a HDB process + + ```python + >>> import pykx as kx + >>> prices = kx.schema.builder({ + ... 'time': kx.TimespanAtom , 'sym': kx.SymbolAtom, + ... 'exchange': kx.SymbolAtom, 'sz': kx.LongAtom, + ... 'px': kx.FloatAtom}) + >>> hdb = kx.tick.HDB(port=5035) + Initialising HDB process on port: 5035 + HDB process initialised successfully on port: 5035 + >>> hdb.set_tables({'prices': prices}) + >>> hdb('prices') + pykx.Table(pykx.q(' + time sym exchange sz px + ----------------------- + ')) + ``` + """ + super().set_tables(tables) + class GATEWAY(STREAMING): """ diff --git a/src/pykx/toq.pyx b/src/pykx/toq.pyx index a35b5bf..6c1278e 100644 --- a/src/pykx/toq.pyx +++ b/src/pykx/toq.pyx @@ -104,17 +104,21 @@ from . import wrappers as k from ._pyarrow import pyarrow as pa from .cast import * from . import config -from .config import find_core_lib, k_allocator, licensed, pandas_2, system +from .config import beta_features, find_core_lib, k_allocator, licensed, pandas_2, system from .constants import NULL_INT16, NULL_INT32, NULL_INT64 from .constants import INF_INT16, INF_INT32, INF_INT64, INF_NEG_INT16, INF_NEG_INT32, INF_NEG_INT64 from .exceptions import LicenseException, PyArrowUnavailable, PyKXException, QError from .util import df_from_arrays, slice_to_range +import importlib.util +_torch_unavailable = importlib.util.find_spec('torch') is None + __all__ = [ 'from_arrow', 'from_bytes', 'from_callable', + 'from_datetime', 'from_datetime_date', 'from_datetime_time', 'from_datetime_datetime', @@ -185,7 +189,7 @@ np_float32_types = ( # 64-bits floating-point types to convert to q Float np_float64_types = ( - np.double, np.longdouble, np.float64, np.float_ + np.double, np.longdouble, np.float64 ) np_float_types = np_float32_types + np_float64_types @@ -1821,7 +1825,7 @@ def from_pandas_categorical(x: pd.Categorical, x.categories) ENUMS.append(name) else: - res = q(f"{{if[any not y in {name}; `cast]; `{name}$y@x}}", + res = q(f"{{if[any not y in {name}; `cast]; `{name}?y@x}}", x.codes.astype('int32'), x.categories) return res @@ -2097,7 +2101,6 @@ def from_datetime_datetime(x: Any, raise TypeError("Cast must be of type Boolean") if (cast is None or cast) and type(x) is not datetime.datetime: x = cast_to_python_datetime(x) - cdef core.K kx epoch = datetime.datetime(2000, 1, 1) if ktype is None or ktype is k.TimestampAtom: @@ -2300,6 +2303,37 @@ def from_numpy_timedelta64(x: np.timedelta64, return factory(kx, False) +def from_datetime(x: Any, + ktype: Optional[KType] = None, + *, + cast: bool = False, + handle_nulls: bool = False, + strings_as_char: bool = False, +) -> k.TemporalFixedAtom: + """Helper function to handle `np.datetime64` by calling the correct conversion functions. + + Parameters: + x: The object that will be converted into an instance of a `pykx.TemporalFixedAtom`. + ktype: Desired `pykx.K` subclass (or type number) for the returned value. If `None`, the + type is inferred from `x`. + cast: Unused. + handle_nulls: Unused. + + Returns: + An instance of a subclass of `pykx.TemporalStampAtom`. + """ + + if isinstance(x, np.datetime64): + return from_numpy_datetime64(x, + ktype=ktype, + cast=cast, + handle_nulls=handle_nulls) + else: + return from_datetime_datetime(x, + ktype=ktype, + cast=cast, + handle_nulls=handle_nulls) + def from_slice(x: slice, ktype: Optional[KType] = None, *, @@ -2622,6 +2656,21 @@ def from_callable(x: Callable, return q('{.pykx.wrap[x][<]}', k.Foreign(x)) +def from_torch_tensor(x: pt.Tensor, + ktype: Optional[KType] = None, + *, + cast: bool = False, + handle_nulls: bool = False, + strings_as_char: bool = False, +) -> k.List: + if not beta_features: + raise QError('Conversions to PyTorch objects only supported as a beta feature') + if _torch_unavailable: + raise QError('PyTorch not available, please install PyTorch') + import torch + return toq(x.numpy()) + + cdef extern from 'include/foreign.h': uintptr_t py_to_pointer(object x) void py_destructor(core.K x) @@ -2727,7 +2776,7 @@ _converter_from_ktype = { k.FloatAtom: from_float, k.CharAtom: _from_str_like, k.SymbolAtom: _from_str_like, - k.TimestampAtom: from_datetime_datetime, + k.TimestampAtom: from_datetime, k.MonthAtom: from_datetime_datetime, k.DateAtom: from_datetime_date, @@ -2795,7 +2844,6 @@ _converter_from_python_type = { np.double: from_float, np.longdouble: from_float, np.float64: from_float, - np.float_: from_float, str: from_str, bytes: from_bytes, @@ -2827,6 +2875,7 @@ _converter_from_python_type = { pd.core.indexes.multi.MultiIndex: from_pandas_index, pd.core.indexes.category.CategoricalIndex: from_pandas_index, pd.Categorical: from_pandas_categorical, + } @@ -2839,7 +2888,6 @@ _converter_from_python_type[pd._libs.tslibs.timedeltas.Timedelta] = from_pandas_ class ToqModule(ModuleType): def __call__(self, x: Any, ktype: Optional[KType] = None, *, cast: bool = None, handle_nulls: bool = False, strings_as_char: bool = False) -> k.K: ktype = _resolve_k_type(ktype) - check_ktype = False try: check_ktype = ktype is not None \ @@ -2871,7 +2919,6 @@ class ToqModule(ModuleType): else: if type(x) in _converter_from_python_type: converter = _converter_from_python_type[type(x)] - elif isinstance(x, Path): converter = from_pathlib_path elif x is Ellipsis: @@ -2901,6 +2948,8 @@ class ToqModule(ModuleType): converter = from_bytes elif isinstance(x, k.PandasUUIDArray): converter = from_numpy_ndarray + elif "'torch.Tensor'" in str(type(x)): + converter = from_torch_tensor else: converter = _default_converter if type(ktype)==dict: diff --git a/src/pykx/util.py b/src/pykx/util.py index 86f4147..6b4ec33 100644 --- a/src/pykx/util.py +++ b/src/pykx/util.py @@ -44,6 +44,7 @@ 'classproperty', 'attr_as', 'cached_property', + 'class_or_instancemethod', 'debug_environment', 'df_from_arrays', 'get_default_args', @@ -106,6 +107,12 @@ def __get__(self, instance, owner=None): return value +class class_or_instancemethod(classmethod): + def __get__(self, instance, type_): + descr_get = super().__get__ if instance is None else self.__func__.__get__ + return descr_get(instance, type_) + + def slice_to_range(s: slice, n: int) -> range: """Converts a slice and collection size into a range whose indices match the slice. diff --git a/src/pykx/wrappers.py b/src/pykx/wrappers.py index 7a4ff87..d3bc490 100644 --- a/src/pykx/wrappers.py +++ b/src/pykx/wrappers.py @@ -164,6 +164,7 @@ from abc import ABCMeta from collections import abc +import copy from datetime import datetime, timedelta import importlib from inspect import signature @@ -179,14 +180,19 @@ import pandas as pd import pytz -from . import _wrappers, help +from . import _wrappers, beta_features, help from ._pyarrow import pyarrow as pa -from .config import k_gc, licensed, pandas_2, suppress_warnings +from .config import _check_beta, k_gc, licensed, pandas_2, suppress_warnings from .core import keval as _keval from .constants import INF_INT16, INF_INT32, INF_INT64, INF_NEG_INT16, INF_NEG_INT32, INF_NEG_INT64 from .constants import NULL_INT16, NULL_INT32, NULL_INT64 from .exceptions import LicenseException, PyArrowUnavailable, PyKXException, QError -from .util import cached_property, classproperty, detect_bad_columns, df_from_arrays, slice_to_range +from .util import cached_property, class_or_instancemethod, classproperty, detect_bad_columns, df_from_arrays, slice_to_range # noqa E501 + +import importlib.util +_torch_unavailable = importlib.util.find_spec('torch') is None +if not _torch_unavailable: + beta_features.append('PyTorch Conversions') q_initialized = False @@ -415,6 +421,9 @@ def __bool__(self): any = __bool__ all = __bool__ + def copy(self): + return copy.copy(self) + @property def is_atom(self): return True @@ -1293,6 +1302,15 @@ def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: boo class NumericAtom(Atom): """Base type for all q numeric atoms.""" + + def __new__(cls, x: Any, *args, cast: bool = None, **kwargs): + try: + if math.isinf(x): + return cls.inf if x>0 else -cls.inf + except BaseException: + pass + return toq(x, ktype=None if cls is K else cls, cast=cast) + def __int__(self): return int(self.py()) @@ -2286,6 +2304,20 @@ def grouped(self): else: raise e + def replace(self, to_replace, replace_with): + res = q(''' + {[l;s;r] + lT:type l; + rT:type r; + sOp:$[(rT>=0) or lT=0;~/:;=]; + rI:where sOp[s;l]; + if[0=count rI;:l]; + atF:$[(0=lT) or neg[lT]=rT;@[;;:;];{1_ @[(::),x;1+y;:;z]}]; + r:count[rI]#enlist r; + atF[l;rI;r] + }''', self, to_replace, replace_with) + return res + class List(Vector): """Wrapper for q lists, which are vectors of K objects of any type. @@ -2315,9 +2347,26 @@ def has_infs(self) -> bool: def py(self, *, raw: bool = False, has_nulls: Optional[bool] = None, stdlib: bool = True): return [_rich_convert(x, stdlib, raw) for x in self] - def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None, reshape: Union[bool, list] = False): # noqa: E501 """Provides a Numpy representation of the list.""" - return _wrappers.list_np(self, False, has_nulls, raw) + if reshape == False: # noqa: E712 + return _wrappers.list_np(self, False, has_nulls, raw) + if isinstance(reshape, bool): + dims = q("{$[0=t:type x;count[x],'distinct raze .z.s each x;enlist(count x;neg t)]}", self) # noqa: E501 + if len(dims) != 1: + raise TypeError('Data must be a singular type "rectangular" matrix') + dims = dims[0][:-1] + else: + dims = reshape + razed = q('(raze/)', self) + return razed.np().reshape(dims) + + def pt(self, *, reshape: Union[bool, list] = True): + _check_beta('PyTorch Conversion') + if _torch_unavailable: + raise QError('PyTorch not available, please install PyTorch') + import torch + return torch.from_numpy(self.np(reshape=reshape)) class NumericVector(Vector): @@ -2384,6 +2433,13 @@ def pd( res = pd.Series(arr, copy=False) return res + def pt(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + _check_beta('PyTorch Conversion') + if _torch_unavailable: + raise QError('PyTorch not available, please install PyTorch') + import torch + return torch.from_numpy(self.np(raw=raw, has_nulls=has_nulls)) + class BooleanVector(IntegralNumericVector): """Wrapper for q boolean vectors.""" @@ -2634,6 +2690,13 @@ def __rpow__(self, other, mod=None): def np(self, *, raw: bool = False, has_nulls: Optional[bool] = None): return _wrappers.k_vec_to_array(self, self._np_type) + def pt(self, *, raw: bool = False, has_nulls: Optional[bool] = None): + _check_beta('PyTorch Conversion') + if _torch_unavailable: + raise QError('PyTorch not available, please install PyTorch') + import torch + return torch.from_numpy(self.np(raw=raw, has_nulls=has_nulls)) + class RealVector(NonIntegralNumericVector): """Wrapper for q real (i.e. 32 bit float) vectors.""" @@ -5126,6 +5189,21 @@ def params(self): str(x) for x in q('k){x:.:x;$[min (x:x[1]) like "PyKXParam*"; `$9_\'$x; x]}', self) ) + def __new__(cls, x: Any, *args, cast: bool = None, **kwargs): + if isinstance(x, str): + x = q(x) + if not isinstance(x, Lambda): + raise TypeError("String passed is not in the correct lambda form") + return toq(x, ktype=None if cls is K else cls, cast=cast) + + @property + def string(self): + return q.string(self) + + @property + def value(self): + return q.value(self) + class UnaryPrimitive(Function): """Wrapper for q unary primitive functions, including `::`, and other built-ins. @@ -6937,11 +7015,13 @@ def exp(self, iterator=None): """ return self.call('exp', iterator=iterator) - @staticmethod - def fby(by, aggregate, data, by_table=False, data_table=False): + @class_or_instancemethod + def fby(int_or_class, by, aggregate, data, by_table=False, data_table=False): # noqa B902 """Helper function to create an `fby` inside a Column object Creates: `(fby;(enlist;aggregate;data);by)` `data_table` and `by_table` can be set to True to create Table ParseTree of their input""" + if not isinstance(int_or_class, type): + raise RuntimeError('Please use pykx.Column.fby() instead of running .fby() on a created Column object.') # noqa E501 if by_table or isinstance(by, (dict, Dictionary)): if isinstance(by, dict): name = list(by.keys())[0] diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..1377554 --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/tests/conftest.py b/tests/conftest.py index 66c7d2f..8ac6ef7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -82,7 +82,7 @@ def random_free_port(): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('localhost', 0)) port = s.getsockname()[1] - if port == 15001 or port == 15002 or port == 15003 or port == 15004: + if port in [15001, 15002, 15003, 15004, 15005]: return random_free_port() else: return port diff --git a/tests/main.q b/tests/main.q index 962f38c..5e552f7 100644 --- a/tests/main.q +++ b/tests/main.q @@ -1 +1,2 @@ +setenv[`PYKX_BETA_FEATURES;"True"] \l pykx.q diff --git a/tests/parse_tests.py b/tests/parse_tests.py index aae9d1b..98b68af 100644 --- a/tests/parse_tests.py +++ b/tests/parse_tests.py @@ -294,7 +294,8 @@ def make_tests(self): # noqa 'import subprocess', 'from packaging import version', 'import uuid', - 'import itertools' + 'import itertools', + 'import operator' ] diff --git a/tests/qcumber_tests/conversions.quke b/tests/qcumber_tests/conversions.quke index 359c0f3..dd66e5d 100644 --- a/tests/qcumber_tests/conversions.quke +++ b/tests/qcumber_tests/conversions.quke @@ -16,6 +16,8 @@ feature conversions .qu.compare[""; t .pykx.toraw .z.t]; expect raw datetime conversions to work .qu.compare[""; t .pykx.toraw 2006.07.21T09:13:39]; + expect pytorch type + .qu.compare[""; t .pykx.topt enlist til 10] should convert wrapped foreigns to correct types expect python type x:.pykx.eval"pykx.LongVector(list(range(10)))"; @@ -29,6 +31,9 @@ feature conversions expect pyarrow type x:.pykx.eval"pykx.LongVector(list(range(10)))"; .qu.compare[""; t .pykx.topa x]; + expect pytorch type + x:.pykx.eval"pykx.List([[10, 20, 30],[20, 30, 40]])"; + .qu.compare[""; t .pykx.topt x] expect K type x:.pykx.eval"pykx.LongVector(list(range(10)))"; .qu.compare[""; t .pykx.tok x]; diff --git a/tests/qcumber_tests/extra_functions.quke b/tests/qcumber_tests/extra_functions.quke index 3ca2b88..46c9411 100644 --- a/tests/qcumber_tests/extra_functions.quke +++ b/tests/qcumber_tests/extra_functions.quke @@ -39,6 +39,14 @@ feature .pykx.toq .qu.compare[-7h; type .pykx.toq .pykx.eval["1"]]; expect a short atom .qu.compare[-5h; type .pykx.toq .pykx.eval["pykx.ShortAtom(1)"]]; + should round trip if the item is a conversion object + expect round trips to be handled gracefully + all( + val~.pykx.toq .pykx.tonp val:til 10; + val~.pykx.toq .pykx.topy val:100?0Ng; + val~.pykx.toq .pykx.topd val:([]10?1f;10?1f); + val~.pykx.toq .pykx.tonp .pykx.topy val:1 2 3 + ) should round trip if the item is not a foreign/wrapper expect round trips to be handled gracefully for existing q data all( @@ -96,6 +104,8 @@ feature .pykx.loadPy @[{.pykx.loadPy x;0b};"file.t";{x like"File extension must be .py/.p"}] expect to error if loading .p file directly without loadPy @[{system"l ",x;0b};"tests/test_files/func1.p";{x like"SyntaxError*"}] + expect the name of the file in error message if file does not exist + @[{.pykx.loadPy x;0b};"file.py";{x like "*file.py*"}] should allow loading of various files expect loading of .py files to work using .pykx.loadPy @@ -110,3 +120,63 @@ feature .pykx.loadPy 99h~type .pykx.get[`dict1]`; 11~.pykx.get[`func1;<][10] ) + +feature .pykx.typepy + after + .pykx.setdefault["default"] + + should return correct datatype when conversion is set as default + expect a string to be returned + .qu.compare[10h;type .pykx.typepy 5] + expect numpy int + .qu.compare["";.pykx.typepy 5] + expect numpy array + .qu.compare["";.pykx.typepy til 10] + expect list + .qu.compare["";.pykx.typepy (10?1f;10?1f)] + expect pandas df + .qu.compare["";.pykx.typepy ([]100?1f;100?1f)] + + should return correct datatype when conversion is set as Python + expect a string to be returned + .pykx.setdefault["py"]; // Set conversion type to Python + .qu.compare[10h;type .pykx.typepy 5] + expect int + .qu.compare["";.pykx.typepy 5] + expect list + .qu.compare["";.pykx.typepy (10?1f;10?1f)] + expect dict + .qu.compare["";.pykx.typepy ([]100?1f;100?1f)] + + should return correct datatype when conversion is set as Pandas + expect a string to be returned + .pykx.setdefault["pd"]; // Set conversion type to Pandas + .qu.compare[10h;type .pykx.typepy 5] + expect numpy int + .qu.compare["";.pykx.typepy 5]; + expect pandas array + .qu.compare["";.pykx.typepy (10?1f;10?1f)] + expect pandas df + .qu.compare["";.pykx.typepy ([]100?1f;100?1f)] + + should return correct datatype when conversion is set as PyArrow + expect a string to be returned + .pykx.setdefault["pa"]; // Set conversion type to PyArrow + .qu.compare[10h;type .pykx.typepy 5] + expect numpy int + .qu.compare["";.pykx.typepy 5]; + expect pyarrow array + .qu.compare["";.pykx.typepy (10?1f;10?1f)] + expect pyarrow table + .qu.compare["";.pykx.typepy ([]100?1f;100?1f)] + + should return correct datatype when conversion is set as Pykx + expect a string to be returned + .pykx.setdefault["k"]; // Set conversion type to k + .qu.compare[10h;type .pykx.typepy 5] + expect pykx atom + .qu.compare["";.pykx.typepy 5]; + expect pykx vector + .qu.compare["";.pykx.typepy (10?1f;10?1f)] + expect pykx table + .qu.compare["";.pykx.typepy ([]100?1f;100?1f)] diff --git a/tests/qcumber_tests/pykx.quke b/tests/qcumber_tests/pykx.quke index 94d081a..c7c9e79 100644 --- a/tests/qcumber_tests/pykx.quke +++ b/tests/qcumber_tests/pykx.quke @@ -131,3 +131,14 @@ feature Qlog functions to not be defined in pykx namespace $[.z.o~`l64;@[{get x;1b};`.com_kx_log.setCorrelator;0b];1b] expect logging functionality to have loaded in the .i namespace $[.z.o~`l64;@[{get x;1b};`.com_kx_log.i.endpoint;0b];1b] + +feature toq conversions to support compositions + should pass through compositions + expect q composition + .qu.compare[any;.pykx.toq any] + expect q composition + .qu.compare[any;.pykx.toq0 any] + expect q composition + .qu.compare[any;.pykx.toq0[any;0b]] + expect q composition + .qu.compare[any;.pykx.toq0[any;1b]] \ No newline at end of file diff --git a/tests/qcumber_tests/reimport.quke b/tests/qcumber_tests/reimport.quke index 99e7c29..e8616ce 100644 --- a/tests/qcumber_tests/reimport.quke +++ b/tests/qcumber_tests/reimport.quke @@ -1,7 +1,17 @@ feature pykx.q + + after + .p.e:{.pykx.pyexec x} + should not error expect not error .qu.compare[(::); system"l pykx.q"] + should error + expect an error if embedPy is already loaded + .p.e:{"not the pykx version"}; + err:@[{system"l pykx.q"};`;{x}]; + .qu.compare["Unable to load PyKX, see logged output for more information";err]; + feature .pykx.safeReimport should function safely diff --git a/tests/qscripts/empty_line.q b/tests/qscripts/empty_line.q new file mode 100644 index 0000000..87216c3 --- /dev/null +++ b/tests/qscripts/empty_line.q @@ -0,0 +1,10 @@ +.test.testFunc:{[x;y] + + z: 1+1; + + k: 2+3; + + x+y + } + +.test.testFunc[1;100] diff --git a/tests/qscripts/pyfile.py b/tests/qscripts/pyfile.py new file mode 100644 index 0000000..8c905ca --- /dev/null +++ b/tests/qscripts/pyfile.py @@ -0,0 +1,2 @@ +def pyfunc(x, y): + return x*y diff --git a/tests/test_config.py b/tests/test_config.py index 53808ed..b4d0fb5 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -44,15 +44,22 @@ def test_boolean_config(): assert kx.config.pykx_qdebug +@pytest.mark.isolate +def test_valid_qlic(): + os.environ['QLIC'] = 'invalid' + with pytest.warns() as warnings: + import pykx as kx + assert len(warnings) == 1 + assert 'Configuration value QLIC set to non directory' in str(warnings[0].message) + assert 2 == kx.q('2').py() + + @pytest.mark.isolate def test_qargs_single(): os.environ['QARGS'] = '-p 5050' with pytest.warns() as warnings: import pykx as kx - if os.getenv('PYKX_THREADING', None) is None: - assert len(warnings) == 1 - else: - assert len(warnings) == 2 + assert len(warnings) == 1 assert 'setting a port in this way' in str(warnings[0].message) assert 2 == kx.q('2').py() @@ -62,10 +69,7 @@ def test_qargs_multi(): os.environ['QARGS'] = '-p 5050 -t 1000' with pytest.warns() as warnings: import pykx as kx - if os.getenv('PYKX_THREADING', None) is None: - assert len(warnings) == 2 - else: - assert len(warnings) == 3 + assert len(warnings) == 2 assert 'setting a port in this way' in str(warnings[0].message) assert 'setting timers in this way' in str(warnings[1].message) assert 2 == kx.q('2').py() diff --git a/tests/test_db.py b/tests/test_db.py index 33e7712..35019f5 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -19,6 +19,13 @@ def test_creation(kx): 'sym': kx.q('`a`b`b`c') }) db.create(tab, 't', 'date', by_field='sym', sym_enum='sym') + db.create(tab[['ti', 'p', 'sz', 'sym']].pd(), + 't', + kx.DateAtom(2015, 1, 3), + by_field='sym', + sym_enum='sym' + ) + assert 3 == len(db.t.select(kx.Column('date'))) assert db.tables == ['t'] @@ -193,11 +200,11 @@ def test_column_copy(kx): def test_column_apply(kx): db = kx.DB() db.load('db') - assert all([100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) + assert all([100, 200, 150, 210, 100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) db.apply_function('t', 'size', kx.q('2*')) - assert all([200, 400, 300, 420] == kx.q.qsql.select(db.t, 'size')['size']) + assert all([200, 400, 300, 420, 200, 400, 300, 420] == kx.q.qsql.select(db.t, 'size')['size']) db.apply_function('t', 'size', lambda x: x.np()/2) - assert all([100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) + assert all([100, 200, 150, 210, 100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) with pytest.raises(RuntimeError) as err: db.apply_function('t', 'size', 2) assert "Provided 'function' is not callable" in str(err.value) @@ -221,19 +228,20 @@ def test_db_fill(kx): 'col1': kx.random.random(1000, 10.0), 'col2': kx.random.random(1000, 10) }) - db.create(qtab, 'newtab', kx.q('2015.01.02')) + db.create(qtab, 'newtab', kx.q('2015.01.03')) with pytest.raises(kx.QError) as err: db.partition_count() assert '2015.01.01/newtab. OS reports: No such file or directory' in str(err.value) db.fill_database() parts = db.partition_count() - all(kx.q.qsql.exec(parts.values(), 'newtab') == [0, 1000]) + all(kx.q.qsql.exec(parts.values(), 'newtab') == [0, 0, 1000]) @pytest.mark.order(18) def test_load_warning(kx): kx.q('`:./db/2015.01.01/table/ set .Q.en[`:./db;]([] ti:09:30:00 09:31:00; p:101 102f; sz:100 200; sym:`a`b)') # noqa: E501 kx.q('`:./db/2015.01.02/table/ set .Q.en[`:./db;]([] ti:09:30:00 09:31:00; p:101.5 102.5; sz:150 210;sym:`b`c)') # noqa: E501 + kx.q('`:./db/2015.01.03/table/ set .Q.en[`:./db;]([] ti:09:30:00 09:31:00; p:101.5 102.5; sz:150 210;sym:`b`c)') # noqa: E501 db = kx.db.DB() assert db.tables is None with warnings.catch_warnings(record=True) as w: @@ -252,10 +260,10 @@ def test_compress(kx): 'col1': kx.random.random(1000, 10.0), 'col2': kx.random.random(1000, 10) }) - db.create(qtab, 'comptab', kx.q('2015.01.02'), compress=compress) + db.create(qtab, 'comptab', kx.q('2015.01.03'), compress=compress) db.fill_database() assert zd_cache == kx.q.z.zd - compress_info = kx.q('-21!key`:./2015.01.02/comptab/col1') + compress_info = kx.q('-21!key`:./2015.01.03/comptab/col1') assert type(compress_info) == kx.Dictionary assert compress_info['algorithm'].py() == 2 assert compress_info['zipLevel'].py() == 8 @@ -284,10 +292,12 @@ def test_partition_count(kx): db = kx.DB(path='db') fullview = db.partition_count() assert type(fullview) == kx.Dictionary # noqa: E721 - assert 2 == len(fullview) + assert 3 == len(fullview) + cache = kx.q.Q.pv subview = db.partition_count(subview=kx.q('2015.01.02')) assert type(subview) == kx.Dictionary # noqa: E721 assert 1 == len(subview) + assert all(cache == kx.q.Q.pv) def test_subview(kx): @@ -299,7 +309,7 @@ def test_subview(kx): db.subview() qtab = kx.q.qsql.select(db.trades) assert type(qtab) == kx.Table # noqa: E721 - assert 4 == len(qtab) + assert 8 == len(qtab) @pytest.mark.isolate @@ -347,7 +357,8 @@ def test_spaces_load(tmp_path): 'sz': kx.q('100 200 150 210'), 'sym': kx.q('`a`b`b`c') }) - db.create(tab, 't', 'date', by_field='sym', sym_enum='sym') + # removal of by_field allows testing of 'dpfs' error fixed in 3.1 + db.create(tab, 't', 'date', sym_enum='sym') assert db.tables == ['t'] db.load(path=test_location, overwrite=True) assert db.tables == ['t'] diff --git a/tests/test_ipc.py b/tests/test_ipc.py index ef11358..4961d8f 100644 --- a/tests/test_ipc.py +++ b/tests/test_ipc.py @@ -151,15 +151,6 @@ def test_timeout(kx, q_port): assert [0, 1, 2, 3, 4] == q('til 5').py() -@pytest.mark.asyncio -@pytest.mark.unlicensed -async def test_async_timeout(kx, q_port): - async with kx.AsyncQConnection('localhost', q_port, timeout=2.0) as q: - with pytest.raises(kx.QError): - await q('{t:.z.p;while[.z.p 10: + assert call.cancelled() or call.cancelling() + with pytest.raises(asyncio.exceptions.InvalidStateError): call.result() with pytest.raises(kx.QError): await q('zzz') - with pytest.raises(kx.PyKXException): - call.get_loop() q_future = q('til 10') - with pytest.raises(kx.NoResults): + with pytest.raises(asyncio.exceptions.InvalidStateError): raise q_future.exception() q_future.cancel() - with pytest.raises(kx.FutureCancelled): + with pytest.raises(asyncio.exceptions.InvalidStateError): raise q_future.exception() with pytest.raises(kx.QError): q_future = q('zzz') diff --git a/tests/test_query.py b/tests/test_query.py index 89f879b..bcd6756 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -860,3 +860,12 @@ def test_column_licensed(kx): with pytest.raises(kx.LicenseException) as err: kx.Column('s') assert "kx.Column" in str(err) + + +def test_fby_instance_call(kx): + table = kx.q('([] x:`a`b`c;x1:1 2 3;x2:`a`e`g;x11:0 3 3;b:011b)') + assert kx.Column.fby(['c1,c2'], 'sum', table)._name == ['c1,c2'] + + with pytest.raises(RuntimeError) as err: + kx.Column('a').fby(['c1,c2'], 'sum', table) + assert "Column object" in str(err) diff --git a/tests/test_splay.py b/tests/test_splay.py new file mode 100644 index 0000000..d2713d1 --- /dev/null +++ b/tests/test_splay.py @@ -0,0 +1,246 @@ +import os +import shutil + +# Do not import pykx here - use the `kx` fixture instead! +import pytest + + +@pytest.mark.order(1) +def test_creation(kx): + # Definition of qtab would break kx.DB prior to use of .Q.pt + kx.q('qtab:([]100?1f;100?1f)') + db = kx.DB(path='splay_db') + tab = kx.Table(data={ + 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + 'p': kx.q('101 102 101.5 102.5'), + 'sz': kx.q('100 200 150 210'), + 'sym': kx.q('`a`b`b`c') + }) + db.create(tab, 't', format='splayed') + assert db.tables == ['t'] + + +@pytest.mark.order(2) +def test_create_errors(kx): + db = kx.DB(path='err_db') + tab = kx.Table(data={ + 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + 'p': kx.q('101 102 101.5 102.5'), + 'sz': kx.q('100 200 150 210'), + 'sym': kx.q('`a`b`b`c') + }) + with pytest.raises(kx.QError) as err: + db.create(tab, 't', format='unsupported') + assert "'format' must be one of" in str(err.value) + + +@pytest.mark.order(3) +def test_load_1(kx): + db = kx.db.DB() + assert db.tables is None + db.load('splay_db') + assert db.tables == ['t'] + assert type(db.t) == kx.SplayedTable # noqa: E721 + with pytest.raises(kx.QError) as err: + db.load('../splay_db') + assert 'Attempting to reload existing' in str(err.value) + with pytest.raises(kx.QError) as err: + db.load('test') + assert 'Only one kdb+ database' in str(err.value) + with pytest.raises(kx.QError) as err: + db.load('../pyproject.toml', overwrite=True) + assert 'Provided path is a file' in str(err.value) + with pytest.raises(kx.QError) as err: + db.load('doesNotExist', overwrite=True) + assert 'Unable to find object at specified path' in str(err.value) + + +@pytest.mark.order(4) +def test_load_2(kx): + db = kx.DB(path='splay_db') + assert db.tables == ['t'] + assert type(db.t) == kx.SplayedTable # noqa: E721 + + +@pytest.mark.order(5) +def test_list(kx): + db = kx.DB() + db.load('splay_db') + print(db.tables) + db_cols = db.list_columns('t') + assert db_cols == ['date', 'ti', 'p', 'sz', 'sym'] + with pytest.raises(kx.QError) as err: + db.list_columns('no_tab') + assert 'Column listing not possible' in str(err.value) + + +@pytest.mark.order(6) +def test_column_add(kx): + db = kx.DB() + db.load('splay_db') + assert ['date', 'ti', 'p', 'sz', 'sym'] == db.list_columns('t') + db.add_column('t', 'vol', kx.IntAtom.null) + db_cols = db.list_columns('t') + assert ['date', 'ti', 'p', 'sz', 'sym', 'vol'] == db_cols + + +@pytest.mark.order(7) +def test_column_reorder(kx): + db = kx.DB() + db.load('splay_db') + db.reorder_columns('t', ['vol', 'sym', 'sz', 'p', 'ti', 'date']) + assert ['vol', 'sym', 'sz', 'p', 'ti', 'date'] == db.list_columns('t') + + +@pytest.mark.order(8) +def test_column_rename(kx): + db = kx.DB() + db.load('splay_db') + db.rename_column('t', 'p', 'price') + assert ['vol', 'sym', 'sz', 'price', 'ti', 'date'] == db.list_columns('t') + with pytest.raises(kx.QError) as err: + db.rename_column('t', 'no_col', 'upd') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(9) +def test_column_delete(kx): + db = kx.DB() + db.load('splay_db') + db.delete_column('t', 'vol') + assert ['sym', 'sz', 'price', 'ti', 'date'] == db.list_columns('t') + with pytest.raises(kx.QError) as err: + db.delete_column('t', 'no_col') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(10) +def test_column_find(kx): + db = kx.DB() + db.load('splay_db') + assert db.find_column('t', 'price') # noqa: E711 + assert not db.find_column('t', 'no_col') + + +@pytest.mark.order(11) +def test_column_set_attr(kx): + db = kx.DB() + db.load('splay_db') + assert 'g' not in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + db.set_column_attribute('t', 'sym', 'grouped') + assert 'g' in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + with pytest.raises(kx.QError) as err: + db.set_column_attribute('t', 'no_col', 'unique') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(12) +def test_column_clear_attr(kx): + db = kx.DB() + db.load('splay_db') + assert 'g' in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + db.clear_column_attribute('t', 'sym') + assert 'g' not in kx.q.qsql.exec(kx.q.meta(db.t), columns='a') + with pytest.raises(kx.QError) as err: + db.clear_column_attribute('t', 'no_col') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(13) +def test_column_set_type(kx): + db = kx.DB() + db.load('splay_db') + assert b'f' in kx.q.qsql.exec(kx.q.meta(db.t), columns='t').py() + db.set_column_type('t', 'price', kx.LongAtom) + assert b'f' not in kx.q.qsql.exec(kx.q.meta(db.t), columns='t').py() + with pytest.raises(kx.QError) as err: + db.set_column_type('t', 'price', kx.GUIDAtom) + assert "to type: " in str(err.value) + with pytest.raises(kx.QError) as err: + db.set_column_attribute('t', 'no_col', kx.GUIDAtom) + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(14) +def test_column_copy(kx): + db = kx.DB() + db.load('splay_db') + assert ['sym', 'sz', 'price', 'ti', 'date'] == db.list_columns('t') + db.copy_column('t', 'sz', 'size') + assert ['sym', 'sz', 'price', 'ti', 'date', 'size'] == db.list_columns('t') + assert all(kx.q.qsql.select(db.t, 'sz')['sz'] == kx.q.qsql.select(db.t, 'size')['size']) # noqa: E501 + with pytest.raises(kx.QError) as err: + db.copy_column('t', 'no_col', 'new_name') + assert "Specified column 'no_col'" in str(err.value) + + +@pytest.mark.order(15) +@pytest.mark.skipif( + os.getenv('PYKX_THREADING') is not None, + reason='Not supported with PYKX_THREADING' +) +def test_column_apply(kx): + db = kx.DB() + db.load('splay_db') + assert all([100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) + db.apply_function('t', 'size', kx.q('2*')) + assert all([200, 400, 300, 420] == kx.q.qsql.select(db.t, 'size')['size']) + db.apply_function('t', 'size', lambda x: x.np()/2) + assert all([100, 200, 150, 210] == kx.q.qsql.select(db.t, 'size')['size']) + with pytest.raises(RuntimeError) as err: + db.apply_function('t', 'size', 2) + assert "Provided 'function' is not callable" in str(err.value) + + +@pytest.mark.order(16) +def test_table_rename(kx): + db = kx.DB() + db.load('splay_db') + assert db.tables == ['t'] + db.rename_table('t', 'trades') + assert db.tables == ['trades'] + assert type(db.trades) == kx.SplayedTable # noqa: E721 + + +@pytest.mark.order(17) +def test_compress(kx): + zd_cache = kx.q.z.zd + compress = kx.Compress(kx.CompressionAlgorithm.gzip, level=8) + db = kx.DB(path='splay_db') + qtab = kx.Table(data={ + 'col1': kx.random.random(1000, 10.0), + 'col2': kx.random.random(1000, 10) + }) + db.create(qtab, 'comptab', format='splayed', compress=compress) + assert zd_cache == kx.q.z.zd + compress_info = kx.q('-21!`:./comptab/col1') + assert type(compress_info) == kx.Dictionary + assert compress_info['algorithm'].py() == 2 + assert compress_info['zipLevel'].py() == 8 + + +@pytest.mark.isolate +def test_spaces_load(tmp_path): + # prior to using util.loadfile the db.create/load would fail with nyi + test_location = tmp_path/'test directory/db' + import pykx as kx + db = kx.DB(path=test_location) + tab = kx.Table(data={ + 'date': kx.q('2015.01.01 2015.01.01 2015.01.02 2015.01.02'), + 'ti': kx.q('09:30:00 09:31:00 09:30:00 09:31:00'), + 'p': kx.q('101 102 101.5 102.5'), + 'sz': kx.q('100 200 150 210'), + 'sym': kx.q('`a`b`b`c') + }) + db.create(tab, 't', 'date', by_field='sym', sym_enum='sym') + assert db.tables == ['t'] + db.load(path=test_location, overwrite=True) + assert db.tables == ['t'] + + +@pytest.mark.order(-1) +def test_cleanup(kx): + shutil.rmtree('splay_db') + assert True diff --git a/tests/test_tick.py b/tests/test_tick.py index 5950f3e..50195a0 100644 --- a/tests/test_tick.py +++ b/tests/test_tick.py @@ -26,6 +26,9 @@ def test_tick_init(kx): assert tick('1b') assert tick('system"p"').py() == 5030 assert tick('.tick.tabs').py() == [] + with pytest.raises(kx.QError) as err: + tick.set_tables({'quote': kx.schema.builder({'px': kx.FloatAtom})}) + assert "'time' and 'sym' must be first" in str(err.value) tick.set_tables({'trade': trade_schema}) assert tick('.tick.tabs').py() == ['trade'] tick.stop() @@ -142,6 +145,8 @@ def test_rtp_vanilla(kx): rdb = kx.tick.RTP(port=5031) rdb.start({'tickerplant': 'localhost:5030'}) + rdb.set_tables({'px': trade_schema}) + assert isinstance(rdb('px'), kx.Table) assert isinstance(rdb('trades'), kx.Table) assert isinstance(rdb('quotes'), kx.Table) assert len(rdb('trades')) == 0 @@ -228,11 +233,17 @@ def test_rtp_timer(kx): reason='Not supported with PYKX_THREADING' ) def test_hdb_vanilla(kx): + trade_schema = kx.schema.builder({ + 'time': kx.TimespanAtom, + 'sym': kx.SymbolAtom, + 'px': kx.FloatAtom}) hdb = kx.tick.HDB(port=5032) assert hdb('1b') with pytest.raises(kx.QError) as err: hdb('custom_api', 5, 2) assert "custom_api" in str(err.value) + hdb.set_tables({'px': trade_schema}) + assert isinstance(hdb('px'), kx.Table) hdb.register_api('custom_api', custom_api) with pytest.raises(kx.QError) as err: hdb('custom_api', 5, 2) diff --git a/tests/test_toq.py b/tests/test_toq.py index 61c8213..ed7bcd2 100644 --- a/tests/test_toq.py +++ b/tests/test_toq.py @@ -1243,6 +1243,17 @@ def test_from_pandas_categorical(q, kx, pd): # no mutation of the initial symbol assert all(enum==q('series')) + # Test adding new value to existing enum + cat = pd.Series(['aaa', 'bbb', 'ccc', 'ddd'], dtype='category', name='index') + rez = kx.toq(cat) + assert isinstance(rez, kx.EnumVector) + assert isinstance(q('cat'), kx.SymbolVector) + assert isinstance(rez.pd(), pd.Series) + assert isinstance(rez.pd().values, pd.Categorical) + assert all(rez.pd() == cat) + # no mutation of original df + assert all((df.reset_index() == original_df)) + # Test that we don't overwrite an enum already existing on q side sym = q('sym:`aaa`bbb`ccc; a:`sym$10?sym; sym').py() assert(sym == ['aaa', 'bbb', 'ccc']) @@ -1251,21 +1262,6 @@ def test_from_pandas_categorical(q, kx, pd): sym = q('sym').py() assert(sym == ['aaa', 'bbb', 'ccc']) - # Cant re-enumerate with any new symbols - with pytest.raises(kx.QError, match="cast"): - df = pd.DataFrame() - cat = pd.Series(['foo', 'bar', 'baz'], dtype='category', name='cat') - df['series'] = cat - rez = kx.toq(df) - - # Cant re-enumerate with any new symbols - # enum would need extended manually via `?` operator - with pytest.raises(kx.QError, match="cast"): - df = pd.DataFrame() - cat = pd.Series(['aaa', 'bbb', 'ccc', 'ddd'], dtype='category', name='cat') - df['series'] = cat - rez = kx.toq(df) - assert kx.toq.ENUMS == ['enum0', 'cat', 'index', 'series', 'sym'] diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index 01b617d..559798c 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -122,6 +122,24 @@ def test_refcounting_q_vars(self, kx, q): q('![`c;();0b;enlist`a]') assert q('-16!a') == kx._wrappers.k_r(q('a'))== 1 + def test_copy(self, kx): + vec = kx.q.til(10) + as_vec = vec + cp_vec = vec.copy() + vec[3] = 20 + assert as_vec[3] == 20 + assert cp_vec[3] != 20 + assert vec._addr == as_vec._addr + assert vec._addr != cp_vec._addr + assert (cp_vec == kx.q.til(10)).all() + + tab = kx.q('([]100?1f;100?1f)') + as_tab = tab + cp_tab = tab.copy() + tab.select(where=kx.Column('x') > 0.5, inplace=True) + assert len(tab) == len(as_tab) + assert len(tab) != len(cp_tab) + def test_repr(self, q, kx): q.system.console_size = [25, 80] pykx = kx # noqa: F401 @@ -161,8 +179,11 @@ def test_str(self, q): assert str(q('enlist(::)')) == '::' assert str(q('til 4')) == '0 1 2 3' assert str(q('enlist each til 4')).replace('\r\n', '\n') == '0\n1\n2\n3' + assert str(q('\"\"')) == '' + assert str(q('`')) == '' assert str(q('::')) == '::' - assert str(q('()')) == '' + assert str(q('()')) == '()' + assert str(q('()!()')) == '()!()' @pytest.mark.unlicensed(unlicensed_only=True) @pytest.mark.skipif( @@ -928,6 +949,18 @@ def test_numeric_types(self, q, kx): assert isinstance(q('0x7b'), kx.IntegralNumericAtom) assert isinstance(q('1b '), kx.IntegralNumericAtom) + def test_numeric_inf(self, kx): + assert kx.ShortAtom(math.inf) == kx.q('0Wh') + assert kx.ShortAtom(-math.inf) == kx.q('-0Wh') + assert kx.IntAtom(math.inf) == kx.q('0Wi') + assert kx.IntAtom(-math.inf) == kx.q('-0Wi') + assert kx.LongAtom(math.inf) == kx.q('0W') + assert kx.LongAtom(-math.inf) == kx.q('-0W') + assert kx.RealAtom(math.inf) == kx.q('0we') + assert kx.RealAtom(-math.inf) == kx.q('-0we') + assert kx.FloatAtom(math.inf) == kx.q('0w') + assert kx.FloatAtom(-math.inf) == kx.q('-0w') + @pytest.mark.nep49 def test_pd(self, q, pd): assert q('0b').pd() is False @@ -1273,6 +1306,24 @@ def test_time(self, q, kx): assert time.np(raw=True) == 59789214 assert time.py(raw=True) == 59789214 + @pytest.mark.nep49 + def test_timestamp_from_datetime(self, kx, q): + time = np.datetime64('2025-01-27T14:34:21') + assert isinstance(kx.TimestampAtom(time), kx.TimestampAtom) + assert kx.TimestampAtom(time) == kx.TimestampAtom(kx.q('2025.01.27D14:34:21')) + + time_ns_precise = np.datetime64('2025-01-27T14:36:08.987654321', 'ns') + assert isinstance(kx.TimestampAtom(time_ns_precise), kx.TimestampAtom) + assert kx.TimestampAtom(time_ns_precise) == kx.TimestampAtom(kx.q('2025.01.27D14:36:08.987654321')) # noqa: E501 + + time_epoch = np.datetime64(0, 's') + assert isinstance(kx.TimestampAtom(time_epoch), kx.TimestampAtom) + assert kx.TimestampAtom(time_epoch) == kx.TimestampAtom(kx.q('1970.01.01D00:00:00')) + + time_last = np.datetime64('2262-04-11T23:47:16.854775', 'ns') + assert isinstance(kx.TimestampAtom(time_last), kx.TimestampAtom) + assert kx.TimestampAtom(time_last) == kx.TimestampAtom(kx.q('2262.04.11D23:47:16.854775')) # noqa: E501 + class Test_SymbolAtom: def test_str(self, q): @@ -1660,6 +1711,56 @@ def test_np_timestampvector_nulls_IPC(self, kx, q_port): r = conn('([] t:2#0Np)').py() assert pd.isna(r['t'][0]) + def test_vector_replace(self, kx, q): + v = q('1 2 3 4 4 3 2 1') + assert (v.replace(4, 10).count(10) == 2) + assert type(v.replace(4, 10)) == type(v) + assert all(v.replace(0, 101) == v) + assert ("List" in str(type(v.replace(1, 'a')))) + + v2 = q('1 1 1') + assert ("Float" in str(type(v2.replace(1, 2.3)))) + + v3 = q('()') + assert v3.replace(1, 101) == v3 + + @pytest.mark.isolate + def test_torch(self): + import os + os.environ['PYKX_BETA_FEATURES'] = 'True' + import pykx as kx + q = kx.q + import torch + lvec = q.til(10) + llst = q('5 5#25?10') + for i in [lvec, llst]: + assert isinstance(i.pt(), torch.Tensor) + assert i.pt().dtype == torch.int64 + + fvec = q('10?1f') + flst = q('5 5#25?1f') + for i in [fvec, flst]: + assert isinstance(i.pt(), torch.Tensor) + assert i.pt().dtype == torch.float64 + + for i in [llst, flst]: + assert (i.pt() == i.pt(reshape=[5, 5])).all() + + with pytest.raises(TypeError) as err: + q('(1 2;2 3f)').pt() + assert 'Data must be a singular type "rectangular" matrix' in str(err.value) + + with pytest.raises(AttributeError) as err: + q('"abc"').pt() + assert "'CharVector' object has no attribute 'pt'" in str(err.value) + + @pytest.mark.isolate + def test_torch_beta(self): + import pykx as kx + with pytest.raises(kx.QError) as err: + kx.q.til(10).pt() + assert 'Attempting to use a beta feature "PyTorch Con' in str(err.value) + class Test_List: v = '(0b;"G"$"00000000-0000-0000-0000-000000000001";0x02;3h;4i;5j;6e;7f)' @@ -1785,6 +1886,44 @@ def test_raw_conversions(self, q, kx): assert isinstance(q('{x[;3]}', qnest), kx.FloatVector) assert q('{x[;3]~"z"$y[;3]}', nestarr, qnest) + def test_list_replace(self, kx, q): + list1 = kx.q('("a";3;1.3;`b)') + assert (list1.replace(3, "junk").count("junk") == 1) + list2 = list1.replace(1.3, (1, 2, 3)) + assert all(list2[2] == kx.q('1 2 3')) + + list2 = kx.List(('a', 3, 'c')) + assert "SymbolVector" in str(type(list2.replace(3, 'b'))) + + vector_list = kx.q('(1 2 3;`a`b`c)') + assert vector_list.replace((1, 2, 3), "junk")[0] == "junk" + assert all(vector_list.replace(('a', 'b', 'c'), (1.1, 2.2, 3.3))[1] == (1.1, 2.2, 3.3)) + + def test_reshape(self, kx, q): + list1 = kx.q('5 4#20?1f') + assert list1.np().shape == (5,) + assert list1.np().dtype == np.dtype('O') + assert list1.np(reshape=True).shape == (5, 4) + assert list1.np(reshape=True).dtype == np.dtype('float64') + assert list1.np(reshape=[4, 5]).shape == (4, 5) + assert (list1.np(reshape=True) == list1.np(reshape=[5, 4])).all() + + list2 = kx.q('5 4#20?10') + assert list2.np().shape == (5,) + assert list2.np().dtype == np.dtype('O') + assert list2.np(reshape=True).shape == (5, 4) + assert list2.np(reshape=True).dtype == np.dtype('int64') + assert list2.np(reshape=[4, 5]).shape == (4, 5) + assert (list2.np(reshape=True) == list2.np(reshape=[5, 4])).all() + + with pytest.raises(TypeError) as err: + q('(1 2;2 3f)').np(reshape=True) + assert 'Data must be a singular type "rectangular" matrix' in str(err.value) + + with pytest.raises(TypeError) as err: + q('(1 2;2)').np(reshape=True) + assert 'Data must be a singular type "rectangular" matrix' in str(err.value) + # NaN is tricky to compare, so we generate GUID vectors until we get one whose complex form has no # NaNs in it. @@ -3767,6 +3906,35 @@ def test_symbolic_function(self, kx, q, q_port): f3() assert conn('testAlias') + def test_lambda_from_string(self, kx): + assert isinstance(kx.Lambda('{1+1}'), kx.Lambda) + with pytest.raises(TypeError) as e: + kx.Lambda('1+1') + assert "not in correct lambda form" in str(e) + + def test_lambda_properties(self, kx): + saved_console_value = kx.q.system.console_size.py() + kx.q.system.console_size = [2000, 2000] + test_lambda = kx.q('''{a:1+1; + a:a+2.0000000000000000000000000000000000000000000000000000000000000000000; + a:a+2.0000000000000000000000000000000000000000000000000000000000000000000; + a:a+2; + a:a+2; + a:a+2; + a:a+2; + a:a+2; + a:a+2; + a:a+2; + a:a+2;}''') + prev_str_len = len(str(test_lambda)) + prev_property_len = len(test_lambda.string) + kx.q.system.console_size = [5, 5] + assert len(str(test_lambda)) != prev_str_len + assert len(test_lambda.string) == prev_property_len + kx.q.system.console_size = saved_console_value + + assert all(test_lambda.value[-1] == test_lambda.string) + def test_nulls(kx, q, pa):