Skip to content

Commit 810929f

Browse files
committed
Add ASM (unsafe package, windows only)
Add ExceptionHandler (test) Export SIMDLowAVX512
1 parent eab61e0 commit 810929f

26 files changed

Lines changed: 20366 additions & 35741 deletions

.idea/PyFastUtil.iml

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.26)
22
project(PyFastUtil CXX C)
33

4-
set(CMAKE_CXX_STANDARD 23)
4+
set(CMAKE_CXX_STANDARD 20)
55

66
#add_custom_target(Build DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/timestamp DEPENDS ${SOURCE_FILES} DEPENDS pyfastutil)
77
#add_custom_command(

pyfastutil/src/Compat.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#include "stdbool.h"
2727
#endif
2828

29+
#if defined(_WIN32) || defined(_WIN64) || defined(WIN32) || defined(WIN64) || defined(WINNT)
30+
#define WINDOWS
31+
#endif
32+
2933
#if PY_VERSION_HEX >= 0x030D0000 // 3.13
3034
#define IS_PYTHON_313_OR_LATER
3135
#endif

pyfastutil/src/PyFastUtil.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "PyFastUtil.h"
66
#include "utils/simd/BitonicSort.h"
7+
#include "utils/ExceptionHandler.h"
78
#include "ints/IntArrayList.h"
89
#include "ints/IntArrayListIter.h"
910
#include "ints/BigIntArrayList.h"
@@ -17,6 +18,8 @@
1718
#include "objects/ObjectLinkedListIter.h"
1819
#include "unsafe/Unsafe.h"
1920
#include "unsafe/SIMD.h"
21+
#include "unsafe/SIMDLowAVX512.h"
22+
#include "unsafe/ASM.h"
2023

2124
static struct PyModuleDef pyfastutilModule = {
2225
PyModuleDef_HEAD_INIT,
@@ -30,7 +33,8 @@ static struct PyModuleDef pyfastutilModule = {
3033
#pragma ide diagnostic ignored "bugprone-reserved-identifier"
3134
#pragma ide diagnostic ignored "OCUnusedGlobalDeclarationInspection"
3235
PyMODINIT_FUNC PyInit___pyfastutil() {
33-
simd::init();
36+
initExceptionHandler();
37+
simd::initBitonicSort();
3438

3539
PyObject *parent = PyModule_Create(&pyfastutilModule);
3640
if (parent == nullptr)
@@ -51,6 +55,8 @@ PyMODINIT_FUNC PyInit___pyfastutil() {
5155

5256
PyModule_AddObject(parent, "Unsafe", PyInit_Unsafe());
5357
PyModule_AddObject(parent, "SIMD", PyInit_SIMD());
58+
PyModule_AddObject(parent, "SIMDLowAVX512", PyInit_SIMDLowAVX512());
59+
PyModule_AddObject(parent, "ASM", PyInit_ASM());
5460

5561
return parent;
5662
}

pyfastutil/src/unsafe/ASM.cpp

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
//
2+
// Created by xia__mc on 2024/12/10.
3+
//
4+
5+
#include "ASM.h"
6+
#include "utils/memory/AlignedAllocator.h"
7+
#include "utils/memory/FastMemcpy.h"
8+
#include "Compat.h"
9+
10+
#ifdef WINDOWS
11+
12+
#include "windows.h"
13+
14+
#endif
15+
16+
extern "C" {
17+
18+
static PyTypeObject ASMType = {
19+
PyVarObject_HEAD_INIT(&PyType_Type, 0)
20+
};
21+
22+
static int ASM_init([[maybe_unused]] ASM *self,
23+
[[maybe_unused]] PyObject *args, [[maybe_unused]] PyObject *kwargs) {
24+
return 0;
25+
}
26+
27+
static void ASM_dealloc(ASM *self) {
28+
Py_TYPE(self)->tp_free((PyObject *) self);
29+
}
30+
31+
static PyObject *ASM_enter(PyObject *self, [[maybe_unused]] PyObject *args) {
32+
Py_INCREF(self);
33+
return self;
34+
}
35+
36+
static PyObject *ASM_exit([[maybe_unused]] PyObject *self,
37+
[[maybe_unused]] PyObject *const *args, [[maybe_unused]] Py_ssize_t nargs) {
38+
Py_RETURN_NONE;
39+
}
40+
41+
static PyObject *ASM_run([[maybe_unused]] PyObject *__restrict self,
42+
PyObject *const *__restrict args, Py_ssize_t nargs) noexcept {
43+
#ifdef WINDOWS
44+
if (nargs != 1) {
45+
PyErr_SetString(PyExc_TypeError, "Function takes exactly 1 arguments (__code)");
46+
return nullptr;
47+
}
48+
49+
if (!PyBytes_Check(args[0])) {
50+
PyErr_SetString(PyExc_TypeError, "Expected a bytes object.");
51+
return nullptr;
52+
}
53+
54+
const auto *__restrict code = (const unsigned char *) PyBytes_AsString(args[0]);
55+
if (code == nullptr) {
56+
PyErr_SetString(PyExc_ValueError, "Failed to convert argument to c str.");
57+
return nullptr;
58+
}
59+
60+
const auto size = (size_t) PyBytes_Size(args[0]) * sizeof(unsigned char);
61+
62+
unsigned char *__restrict target;
63+
const bool aligned = (uintptr_t) code % 16 == 0;
64+
if (!aligned) {
65+
try {
66+
target = (unsigned char *) alignedAlloc(size, 16);
67+
} catch (const std::exception &e) {
68+
PyErr_SetString(PyExc_MemoryError, e.what());
69+
return nullptr;
70+
}
71+
fast_memcpy(target, code, size);
72+
} else {
73+
target = (unsigned char *) code;
74+
}
75+
76+
DWORD oldProtect;
77+
if (!VirtualProtect((LPVOID) target, size,
78+
PAGE_EXECUTE_READWRITE, &oldProtect)) {
79+
PyErr_SetString(PyExc_OSError, "Failed to make ASM executable.");
80+
81+
if (!aligned) {
82+
alignedFree(target);
83+
}
84+
return nullptr;
85+
}
86+
87+
((void (*)()) target)();
88+
89+
if (!VirtualProtect((LPVOID) target, size, oldProtect, &oldProtect)) {
90+
PyErr_SetString(PyExc_OSError, "Failed to restore memory protection.");
91+
92+
if (!aligned) {
93+
alignedFree(target);
94+
}
95+
return nullptr;
96+
}
97+
98+
if (!aligned) {
99+
alignedFree(target);
100+
}
101+
102+
Py_RETURN_NONE;
103+
#else
104+
PyErr_SetString(PyExc_NotImplementedError, "ASM is not supported on this architecture.");
105+
return nullptr;
106+
#endif
107+
}
108+
109+
static PyObject *ASM_runFast([[maybe_unused]] PyObject *__restrict self,
110+
PyObject *const *__restrict args, [[maybe_unused]] Py_ssize_t nargs) noexcept {
111+
#ifdef WINDOWS
112+
const auto *__restrict code = (const unsigned char *) PyBytes_AS_STRING(args[0]);
113+
const auto size = (size_t) PyBytes_GET_SIZE(args[0]) * sizeof(unsigned char);
114+
115+
if ((uintptr_t) code % 16 != 0) {
116+
unsigned char *__restrict target;
117+
try {
118+
target = (unsigned char *) alignedAlloc(size, 16);
119+
} catch (const std::exception &e) {
120+
PyErr_SetString(PyExc_MemoryError, e.what());
121+
return nullptr;
122+
}
123+
fast_memcpy(target, code, size);
124+
125+
DWORD oldProtect;
126+
if (!VirtualProtect((LPVOID) target, size,
127+
PAGE_EXECUTE_READWRITE, &oldProtect)) {
128+
PyErr_SetString(PyExc_OSError, "Failed to make ASM executable.");
129+
alignedFree(target);
130+
return nullptr;
131+
}
132+
133+
((void (*)()) target)();
134+
135+
if (!VirtualProtect((LPVOID) target, size, oldProtect, &oldProtect)) {
136+
PyErr_SetString(PyExc_OSError, "Failed to restore memory protection.");
137+
alignedFree(target);
138+
return nullptr;
139+
}
140+
141+
alignedFree(target);
142+
} else {
143+
DWORD oldProtect;
144+
if (!VirtualProtect((LPVOID) code, size,
145+
PAGE_EXECUTE_READWRITE, &oldProtect)) {
146+
PyErr_SetString(PyExc_OSError, "Failed to make ASM executable.");
147+
return nullptr;
148+
}
149+
150+
((void (*)()) code)();
151+
152+
if (!VirtualProtect((LPVOID) code, size, oldProtect, &oldProtect)) {
153+
PyErr_SetString(PyExc_OSError, "Failed to restore memory protection.");
154+
return nullptr;
155+
}
156+
}
157+
158+
Py_RETURN_NONE;
159+
#else
160+
PyErr_SetString(PyExc_NotImplementedError, "ASM is not supported on this architecture.");
161+
return nullptr;
162+
#endif
163+
}
164+
165+
static PyMethodDef ASM_methods[] = {
166+
{"__enter__", (PyCFunction) ASM_enter, METH_NOARGS, nullptr},
167+
{"__exit__", (PyCFunction) ASM_exit, METH_FASTCALL, nullptr},
168+
{"run", (PyCFunction) ASM_run, METH_FASTCALL, nullptr},
169+
{"runFast", (PyCFunction) ASM_runFast, METH_FASTCALL, nullptr},
170+
{nullptr, nullptr, 0, nullptr}
171+
};
172+
173+
void initializeASMType(PyTypeObject &type) {
174+
type.tp_name = "ASM";
175+
type.tp_basicsize = sizeof(ASM);
176+
type.tp_itemsize = 0;
177+
type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
178+
type.tp_methods = ASM_methods;
179+
type.tp_init = (initproc) ASM_init;
180+
type.tp_new = PyType_GenericNew;
181+
type.tp_dealloc = (destructor) ASM_dealloc;
182+
type.tp_alloc = PyType_GenericAlloc;
183+
type.tp_free = PyObject_Del;
184+
}
185+
186+
static struct PyModuleDef ASM_module = {
187+
PyModuleDef_HEAD_INIT,
188+
"__pyfastutil.ASM",
189+
"Allow access to native asm.",
190+
-1,
191+
nullptr, nullptr, nullptr, nullptr, nullptr
192+
};
193+
194+
#pragma clang diagnostic push
195+
#pragma ide diagnostic ignored "OCUnusedGlobalDeclarationInspection"
196+
PyMODINIT_FUNC PyInit_ASM() {
197+
initializeASMType(ASMType);
198+
199+
PyObject *object = PyModule_Create(&ASM_module);
200+
if (object == nullptr)
201+
return nullptr;
202+
203+
Py_INCREF(&ASMType);
204+
if (PyModule_AddObject(object, "ASM", (PyObject *) &ASMType) < 0) {
205+
Py_DECREF(&ASMType);
206+
Py_DECREF(object);
207+
return nullptr;
208+
}
209+
210+
return object;
211+
}
212+
#pragma clang diagnostic pop
213+
214+
}

pyfastutil/src/unsafe/ASM.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//
2+
// Created by xia__mc on 2024/12/10.
3+
//
4+
5+
#ifndef PYFASTUTIL_ASM_H
6+
#define PYFASTUTIL_ASM_H
7+
8+
#include "utils/PythonPCH.h"
9+
10+
extern "C" {
11+
typedef struct ASM {
12+
PyObject_HEAD;
13+
} ASM;
14+
}
15+
16+
PyMODINIT_FUNC PyInit_ASM();
17+
18+
#endif //PYFASTUTIL_ASM_H

pyfastutil/src/unsafe/SIMD.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ static PyObject *SIMD_setAVX512Vector16([[maybe_unused]] PyObject *pySelf,
184184
}
185185

186186
static PyObject *SIMD_setAVX512Vector8([[maybe_unused]] PyObject *pySelf,
187-
PyObject *const *args, Py_ssize_t nargs) noexcept {
187+
PyObject *const *args, Py_ssize_t nargs) noexcept {
188188
#if !defined(__arm__) && !defined(__arm64__)
189189
if (nargs != 65) {
190190
PyErr_SetString(PyExc_TypeError, "Function takes exactly 65 arguments (__ptr, ...)");
@@ -260,7 +260,7 @@ static PyObject *SIMD_setAVX2Vector16([[maybe_unused]] PyObject *pySelf,
260260
}
261261

262262
static PyObject *SIMD_setAVX2Vector8([[maybe_unused]] PyObject *pySelf,
263-
PyObject *const *args, Py_ssize_t nargs) noexcept {
263+
PyObject *const *args, Py_ssize_t nargs) noexcept {
264264
#if !defined(__arm__) && !defined(__arm64__)
265265
if (nargs != 33) {
266266
PyErr_SetString(PyExc_TypeError, "Function takes exactly 33 arguments (__ptr, ...)");

pyfastutil/src/unsafe/Unsafe.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,9 @@ static PyMethodDef Unsafe_methods[] = {
285285
{"incref", (PyCFunction) Unsafe_incref, METH_O, nullptr},
286286
{"decref", (PyCFunction) Unsafe_decref, METH_O, nullptr},
287287
{"refcnt", (PyCFunction) Unsafe_refcnt, METH_O, nullptr},
288-
{"fputs", (PyCFunction) Unsafe_fputs, METH_O, nullptr},
288+
{"fputs", (PyCFunction) Unsafe_fputs, METH_O, nullptr},
289289
{"fflush", (PyCFunction) Unsafe_fflush, METH_NOARGS, nullptr},
290-
{"fgets", (PyCFunction) Unsafe_fgets, METH_O, nullptr},
290+
{"fgets", (PyCFunction) Unsafe_fgets, METH_O, nullptr},
291291
{nullptr, nullptr, 0, nullptr}
292292
};
293293

0 commit comments

Comments
 (0)