From 7f2dd13a2f230bd80e5a11be3a630ea2da9948a3 Mon Sep 17 00:00:00 2001 From: asimos-bot Date: Sat, 2 May 2026 23:03:34 -0300 Subject: [PATCH 1/2] made main branch work with both cuda and cpu --- .gitignore | 3 +++ difflogic/difflogic.py | 4 +++- difflogic/packbitstensor.py | 5 ++-- experiments/main.py | 45 +++++++++++++++++++----------------- experiments/main_baseline.py | 29 ++++++++++++----------- setup.py | 28 ++++++++++++++++++---- 6 files changed, 71 insertions(+), 43 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e18a7e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +data-mnist/ +*env/ +**/__pycache__/ diff --git a/difflogic/difflogic.py b/difflogic/difflogic.py index bd2310c..e48f90c 100644 --- a/difflogic/difflogic.py +++ b/difflogic/difflogic.py @@ -1,5 +1,4 @@ import torch -import difflogic_cuda import numpy as np from .functional import bin_op_s, get_unique_connections, GradFactor from .packbitstensor import PackBitsTensor @@ -138,6 +137,7 @@ def forward_cuda_eval(self, x: PackBitsTensor): :param x: :return: """ + import difflogic_cuda assert not self.training assert isinstance(x, PackBitsTensor) assert x.t.shape[0] == self.in_dim, (x.t.shape, self.in_dim) @@ -205,11 +205,13 @@ def extra_repr(self): class LogicLayerCudaFunction(torch.autograd.Function): @staticmethod def forward(ctx, x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y): + import difflogic_cuda ctx.save_for_backward(x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y) return difflogic_cuda.forward(x, a, b, w) @staticmethod def backward(ctx, grad_y): + import difflogic_cuda x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y = ctx.saved_tensors grad_y = grad_y.contiguous() diff --git a/difflogic/packbitstensor.py b/difflogic/packbitstensor.py index bc1afd3..67f2f03 100644 --- a/difflogic/packbitstensor.py +++ b/difflogic/packbitstensor.py @@ -1,4 +1,3 @@ -import difflogic_cuda import torch import numpy as np @@ -13,12 +12,14 @@ def __init__(self, t: torch.BoolTensor, bit_count=32, device='cuda'): if device == 'cuda': t = t.to(device).T.contiguous() + import difflogic_cuda self.t, self.pad_len = difflogic_cuda.tensor_packbits_cuda(t, self.bit_count) else: raise NotImplementedError(device) def group_sum(self, k): assert self.device == 'cuda', self.device + import difflogic_cuda return difflogic_cuda.groupbitsum(self.t, self.pad_len, k) def flatten(self, start_dim=0, end_dim=-1, **kwargs): @@ -38,4 +39,4 @@ def _get_member_repr(self, member): return f"{' '.join(first_three)} {sep} {final}" def __repr__(self): - return '\n'.join([self._get_member_repr(item) for item in self.t]) \ No newline at end of file + return '\n'.join([self._get_member_repr(item) for item in self.t]) diff --git a/experiments/main.py b/experiments/main.py index b55bde2..b1a8ec8 100644 --- a/experiments/main.py +++ b/experiments/main.py @@ -7,6 +7,7 @@ import torch import torchvision from tqdm import tqdm +import sys from results_json import ResultsJSON @@ -128,13 +129,15 @@ def get_model(args): k = args.num_neurons l = args.num_layers + device = 'cuda' if args.implementation == 'cuda' else 'cpu' + #################################################################################################################### if arch == 'randomly_connected': logic_layers.append(torch.nn.Flatten()) - logic_layers.append(LogicLayer(in_dim=in_dim, out_dim=k, **llkw)) + logic_layers.append(LogicLayer(in_dim=in_dim, out_dim=k, device=device, **llkw)) for _ in range(l - 1): - logic_layers.append(LogicLayer(in_dim=k, out_dim=k, **llkw)) + logic_layers.append(LogicLayer(in_dim=k, out_dim=k, device=device, **llkw)) model = torch.nn.Sequential( *logic_layers, @@ -158,7 +161,7 @@ def get_model(args): 'total_num_weights': total_num_weights, }) - model = model.to('cuda') + model = model.to(device) print(model) if args.experiment_id is not None: @@ -168,7 +171,7 @@ def get_model(args): optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) - return model, loss_fn, optimizer + return model, loss_fn, optimizer, device def train(model, x, y, loss_fn, optimizer): @@ -181,13 +184,13 @@ def train(model, x, y, loss_fn, optimizer): return loss.item() -def eval(model, loader, mode): +def eval(model, loader, device, mode): orig_mode = model.training with torch.no_grad(): model.train(mode=mode) res = np.mean( [ - (model(x.to('cuda').round()).argmax(-1) == y.to('cuda')).to(torch.float32).mean().item() + (model(x.to(device).round()).argmax(-1) == y.to(device)).to(torch.float32).mean().item() for x, y in loader ] ) @@ -195,14 +198,14 @@ def eval(model, loader, mode): return res.item() -def packbits_eval(model, loader): +def packbits_eval(model, loader, device): orig_mode = model.training with torch.no_grad(): model.eval() res = np.mean( [ - (model(PackBitsTensor(x.to('cuda').reshape(x.shape[0], -1).round().bool())).argmax(-1) == y.to( - 'cuda')).to(torch.float32).mean().item() + (model(PackBitsTensor(x.to(device).reshape(x.shape[0], -1).round().bool())).argmax(-1) == y.to( + device)).to(torch.float32).mean().item() for x, y in loader ] ) @@ -272,7 +275,7 @@ def packbits_eval(model, loader): np.random.seed(args.seed) train_loader, validation_loader, test_loader = load_dataset(args) - model, loss_fn, optim = get_model(args) + model, loss_fn, optim, device = get_model(args) #################################################################################################################### @@ -283,23 +286,23 @@ def packbits_eval(model, loader): desc='iteration', total=args.num_iterations, ): - x = x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to('cuda') - y = y.to('cuda') + x = x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to(device) + y = y.to(device) loss = train(model, x, y, loss_fn, optim) if (i+1) % args.eval_freq == 0: if args.extensive_eval: - train_accuracy_train_mode = eval(model, train_loader, mode=True) - valid_accuracy_eval_mode = eval(model, validation_loader, mode=False) - valid_accuracy_train_mode = eval(model, validation_loader, mode=True) + train_accuracy_train_mode = eval(model, train_loader, device, mode=True) + valid_accuracy_eval_mode = eval(model, validation_loader, device, mode=False) + valid_accuracy_train_mode = eval(model, validation_loader, device, mode=True) else: train_accuracy_train_mode = -1 valid_accuracy_eval_mode = -1 valid_accuracy_train_mode = -1 - train_accuracy_eval_mode = eval(model, train_loader, mode=False) - test_accuracy_eval_mode = eval(model, test_loader, mode=False) - test_accuracy_train_mode = eval(model, test_loader, mode=True) + train_accuracy_eval_mode = eval(model, train_loader, device, mode=False) + test_accuracy_eval_mode = eval(model, test_loader, device, mode=False) + test_accuracy_train_mode = eval(model, test_loader, device, mode=True) r = { 'train_acc_eval_mode': train_accuracy_eval_mode, @@ -311,9 +314,9 @@ def packbits_eval(model, loader): } if args.packbits_eval: - r['train_acc_eval'] = packbits_eval(model, train_loader) - r['valid_acc_eval'] = packbits_eval(model, train_loader) - r['test_acc_eval'] = packbits_eval(model, test_loader) + r['train_acc_eval'] = packbits_eval(model, train_loader, device) + r['valid_acc_eval'] = packbits_eval(model, train_loader, device) + r['test_acc_eval'] = packbits_eval(model, test_loader, device) if args.experiment_id is not None: results.store_results(r) diff --git a/experiments/main_baseline.py b/experiments/main_baseline.py index 04c213f..c43548c 100644 --- a/experiments/main_baseline.py +++ b/experiments/main_baseline.py @@ -164,7 +164,8 @@ def count_parameters(model): 'total_num_weights': total_num_weights, }) - model = model.to('cuda') + device = 'cuda' if args.implementation == 'cuda' else 'cpu' + model = model.to(device) print(model) if args.experiment_id is not None: @@ -174,7 +175,7 @@ def count_parameters(model): optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) - return model, loss_fn, optimizer + return model, loss_fn, optimizer, device def train(model, x, y, loss_fn, optimizer): @@ -187,15 +188,15 @@ def train(model, x, y, loss_fn, optimizer): return loss.item() -def eval(model, loader, mode): +def eval(model, loader, device, mode): orig_mode = model.training with torch.no_grad(): model.train(mode=mode) res = np.mean( [ (model( - x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to('cuda') - ).argmax(-1) == y.to('cuda') + x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to(device) + ).argmax(-1) == y.to(device) ).to(torch.float32).mean().item() for x, y in loader ] @@ -259,7 +260,7 @@ def eval(model, loader, mode): np.random.seed(args.seed) train_loader, validation_loader, test_loader = load_dataset(args) - model, loss_fn, optim = get_model(args) + model, loss_fn, optim, device = get_model(args) #################################################################################################################### @@ -270,23 +271,23 @@ def eval(model, loader, mode): desc='iteration', total=args.num_iterations, ): - x = x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to('cuda') - y = y.to('cuda') + x = x.to(BITS_TO_TORCH_FLOATING_POINT_TYPE[args.training_bit_count]).to(device) + y = y.to(device) loss = train(model, x, y, loss_fn, optim) if (i+1) % args.eval_freq == 0: if args.extensive_eval: - train_accuracy_train_mode = eval(model, train_loader, mode=True) - valid_accuracy_eval_mode = eval(model, validation_loader, mode=False) - valid_accuracy_train_mode = eval(model, validation_loader, mode=True) + train_accuracy_train_mode = eval(model, train_loader, device, mode=True) + valid_accuracy_eval_mode = eval(model, validation_loader, device, mode=False) + valid_accuracy_train_mode = eval(model, validation_loader, device, mode=True) else: train_accuracy_train_mode = -1 valid_accuracy_eval_mode = -1 valid_accuracy_train_mode = -1 - train_accuracy_eval_mode = eval(model, train_loader, mode=False) - test_accuracy_eval_mode = eval(model, test_loader, mode=False) - test_accuracy_train_mode = eval(model, test_loader, mode=True) + train_accuracy_eval_mode = eval(model, train_loader, device, mode=False) + test_accuracy_eval_mode = eval(model, test_loader, device, mode=False) + test_accuracy_train_mode = eval(model, test_loader, device, mode=True) r = { 'train_acc_eval_mode': train_accuracy_eval_mode, diff --git a/setup.py b/setup.py index 89804c3..4a41def 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,26 @@ from setuptools import setup -from torch.utils.cpp_extension import BuildExtension, CUDAExtension +from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension +import torch +import os + + +def get_extensions(): + use_cuda = False + + if torch.cuda.is_available(): + use_cuda = True + + # Optional: also check CUDA_HOME / nvcc + from torch.utils.cpp_extension import CUDA_HOME + if CUDA_HOME is None: + use_cuda = False + + if use_cuda: + return [CUDAExtension('difflogic_cuda', [ + 'difflogic/cuda/difflogic.cpp', + 'difflogic/cuda/difflogic_kernel.cu', + ], extra_compile_args={'nvcc': ['-lineinfo']})], + return [] with open('README.md', 'r', encoding='utf-8') as fh: long_description = fh.read() @@ -25,10 +46,7 @@ ], package_dir={'difflogic': 'difflogic'}, packages=['difflogic'], - ext_modules=[CUDAExtension('difflogic_cuda', [ - 'difflogic/cuda/difflogic.cpp', - 'difflogic/cuda/difflogic_kernel.cu', - ], extra_compile_args={'nvcc': ['-lineinfo']})], + ext_modules=get_extensions(), cmdclass={'build_ext': BuildExtension}, python_requires='>=3.6', install_requires=[ From 50a02a8a8a3e5236ea751cf0c3287899286f0b37 Mon Sep 17 00:00:00 2001 From: asimos-bot Date: Wed, 6 May 2026 07:56:40 -0300 Subject: [PATCH 2/2] cleaner code for optional difflogic_cuda import --- difflogic/difflogic.py | 9 +++++---- difflogic/packbitstensor.py | 8 +++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/difflogic/difflogic.py b/difflogic/difflogic.py index e48f90c..876dc9c 100644 --- a/difflogic/difflogic.py +++ b/difflogic/difflogic.py @@ -2,7 +2,11 @@ import numpy as np from .functional import bin_op_s, get_unique_connections, GradFactor from .packbitstensor import PackBitsTensor - +import warnings +try: + import difflogic_cuda +except ImportError: + warnings.warn('Couldn\'t import difflogic_cuda. The code will only run on GPU', ImportWarning) ######################################################################################################################## @@ -137,7 +141,6 @@ def forward_cuda_eval(self, x: PackBitsTensor): :param x: :return: """ - import difflogic_cuda assert not self.training assert isinstance(x, PackBitsTensor) assert x.t.shape[0] == self.in_dim, (x.t.shape, self.in_dim) @@ -205,13 +208,11 @@ def extra_repr(self): class LogicLayerCudaFunction(torch.autograd.Function): @staticmethod def forward(ctx, x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y): - import difflogic_cuda ctx.save_for_backward(x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y) return difflogic_cuda.forward(x, a, b, w) @staticmethod def backward(ctx, grad_y): - import difflogic_cuda x, a, b, w, given_x_indices_of_y_start, given_x_indices_of_y = ctx.saved_tensors grad_y = grad_y.contiguous() diff --git a/difflogic/packbitstensor.py b/difflogic/packbitstensor.py index 67f2f03..a7f2a09 100644 --- a/difflogic/packbitstensor.py +++ b/difflogic/packbitstensor.py @@ -1,6 +1,10 @@ import torch import numpy as np - +import warnings +try: + import difflogic_cuda +except ImportError: + warnings.warn('Couldn\'t import difflogic_cuda. The code will only run on GPU', ImportWarning) class PackBitsTensor: def __init__(self, t: torch.BoolTensor, bit_count=32, device='cuda'): @@ -12,14 +16,12 @@ def __init__(self, t: torch.BoolTensor, bit_count=32, device='cuda'): if device == 'cuda': t = t.to(device).T.contiguous() - import difflogic_cuda self.t, self.pad_len = difflogic_cuda.tensor_packbits_cuda(t, self.bit_count) else: raise NotImplementedError(device) def group_sum(self, k): assert self.device == 'cuda', self.device - import difflogic_cuda return difflogic_cuda.groupbitsum(self.t, self.pad_len, k) def flatten(self, start_dim=0, end_dim=-1, **kwargs):