Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 33 additions & 31 deletions tasks/black_scholes/gpu_baseline.cu
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ __global__ void getOutValOptionKernel(
outputVals[optionNum] = resultVal;
}

// ===== Host interface =====
// ===== Host interface (compute_only) =====

static int g_N = 0;
static int* d_types = nullptr;
Expand All @@ -269,23 +269,40 @@ static float* d_ts = nullptr;
static float* d_vols = nullptr;
static float* d_prices = nullptr;

extern "C" void solution_init(int N,
const int* types, const float* strikes, const float* spots,
const float* qs, const float* rs, const float* ts,
const float* vols)
extern "C" void solution_free(void)
{
if (d_types) { cudaFree(d_types); d_types = nullptr; }
if (d_strikes) { cudaFree(d_strikes); d_strikes = nullptr; }
if (d_spots) { cudaFree(d_spots); d_spots = nullptr; }
if (d_qs) { cudaFree(d_qs); d_qs = nullptr; }
if (d_rs) { cudaFree(d_rs); d_rs = nullptr; }
if (d_ts) { cudaFree(d_ts); d_ts = nullptr; }
if (d_vols) { cudaFree(d_vols); d_vols = nullptr; }
if (d_prices) { cudaFree(d_prices); d_prices = nullptr; }
g_N = 0;
}

extern "C" void solution_compute(int N,
const int* types, const float* strikes, const float* spots,
const float* qs, const float* rs, const float* ts,
const float* vols,
float* prices)
{
g_N = N;
size_t szi = (size_t)N * sizeof(int);
size_t szf = (size_t)N * sizeof(float);

cudaMalloc(&d_types, szi);
cudaMalloc(&d_strikes, szf);
cudaMalloc(&d_spots, szf);
cudaMalloc(&d_qs, szf);
cudaMalloc(&d_rs, szf);
cudaMalloc(&d_ts, szf);
cudaMalloc(&d_vols, szf);
cudaMalloc(&d_prices, szf);
if (g_N != N) {
solution_free();
cudaMalloc(&d_types, szi);
cudaMalloc(&d_strikes, szf);
cudaMalloc(&d_spots, szf);
cudaMalloc(&d_qs, szf);
cudaMalloc(&d_rs, szf);
cudaMalloc(&d_ts, szf);
cudaMalloc(&d_vols, szf);
cudaMalloc(&d_prices, szf);
g_N = N;
}

cudaMemcpy(d_types, types, szi, cudaMemcpyHostToDevice);
cudaMemcpy(d_strikes, strikes, szf, cudaMemcpyHostToDevice);
Expand All @@ -294,27 +311,12 @@ extern "C" void solution_init(int N,
cudaMemcpy(d_rs, rs, szf, cudaMemcpyHostToDevice);
cudaMemcpy(d_ts, ts, szf, cudaMemcpyHostToDevice);
cudaMemcpy(d_vols, vols, szf, cudaMemcpyHostToDevice);
}

extern "C" void solution_compute(int N, float* prices)
{
int threadsPerBlock = 256;
int blocks = (N + threadsPerBlock - 1) / threadsPerBlock;

getOutValOptionKernel<<<blocks, threadsPerBlock>>>(
N, d_types, d_strikes, d_spots, d_qs, d_rs, d_ts, d_vols, d_prices);

cudaMemcpy(prices, d_prices, (size_t)N * sizeof(float), cudaMemcpyDeviceToHost);
}

extern "C" void solution_free(void)
{
if (d_types) { cudaFree(d_types); d_types = nullptr; }
if (d_strikes) { cudaFree(d_strikes); d_strikes = nullptr; }
if (d_spots) { cudaFree(d_spots); d_spots = nullptr; }
if (d_qs) { cudaFree(d_qs); d_qs = nullptr; }
if (d_rs) { cudaFree(d_rs); d_rs = nullptr; }
if (d_ts) { cudaFree(d_ts); d_ts = nullptr; }
if (d_vols) { cudaFree(d_vols); d_vols = nullptr; }
if (d_prices) { cudaFree(d_prices); d_prices = nullptr; }
cudaMemcpy(prices, d_prices, szf, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
}
101 changes: 42 additions & 59 deletions tasks/bonds_pricing/gpu_baseline.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1072,39 +1072,47 @@ static bondsDateStruct intializeDateKernelCpu(int d, int m, int y)

extern "C" {

void solution_init(int N,
const int* issue_year, const int* issue_month, const int* issue_day,
const int* maturity_year, const int* maturity_month, const int* maturity_day,
const float* rates, float coupon_freq)
void solution_free(void)
{
g_N = N;
g_issue_year = issue_year;
g_issue_month = issue_month;
g_issue_day = issue_day;
g_maturity_year = maturity_year;
g_maturity_month = maturity_month;
g_maturity_day = maturity_day;
g_rates = rates;
g_coupon_freq = coupon_freq;

// Allocate persistent GPU memory for inArgs
cudaMalloc(&d_discountCurve, N * sizeof(bondsYieldTermStruct));
cudaMalloc(&d_repoCurve, N * sizeof(bondsYieldTermStruct));
cudaMalloc(&d_currDate, N * sizeof(bondsDateStruct));
cudaMalloc(&d_maturityDate, N * sizeof(bondsDateStruct));
cudaMalloc(&d_bondCleanPrice, N * sizeof(dataType));
cudaMalloc(&d_bond, N * sizeof(bondStruct));
cudaMalloc(&d_dummyStrike, N * sizeof(dataType));

// Allocate persistent GPU memory for results
cudaMalloc(&d_dirtyPrice, N * sizeof(dataType));
cudaMalloc(&d_accruedAmountCurrDate, N * sizeof(dataType));
cudaMalloc(&d_cleanPrice, N * sizeof(dataType));
cudaMalloc(&d_bondForwardVal, N * sizeof(dataType));
if (d_discountCurve) { cudaFree(d_discountCurve); d_discountCurve = NULL; }
if (d_repoCurve) { cudaFree(d_repoCurve); d_repoCurve = NULL; }
if (d_currDate) { cudaFree(d_currDate); d_currDate = NULL; }
if (d_maturityDate) { cudaFree(d_maturityDate); d_maturityDate = NULL; }
if (d_bondCleanPrice) { cudaFree(d_bondCleanPrice); d_bondCleanPrice = NULL; }
if (d_bond) { cudaFree(d_bond); d_bond = NULL; }
if (d_dummyStrike) { cudaFree(d_dummyStrike); d_dummyStrike = NULL; }
if (d_dirtyPrice) { cudaFree(d_dirtyPrice); d_dirtyPrice = NULL; }
if (d_accruedAmountCurrDate) { cudaFree(d_accruedAmountCurrDate); d_accruedAmountCurrDate = NULL; }
if (d_cleanPrice) { cudaFree(d_cleanPrice); d_cleanPrice = NULL; }
if (d_bondForwardVal) { cudaFree(d_bondForwardVal); d_bondForwardVal = NULL; }
g_N = 0;
}

void solution_compute(int N, float* prices)
void solution_compute(int N,
const int* issue_year, const int* issue_month, const int* issue_day,
const int* maturity_year, const int* maturity_month, const int* maturity_day,
const float* rates, float coupon_freq,
float* prices)
{
if (g_N != N) {
solution_free();
// Allocate persistent GPU memory for inArgs
cudaMalloc(&d_discountCurve, N * sizeof(bondsYieldTermStruct));
cudaMalloc(&d_repoCurve, N * sizeof(bondsYieldTermStruct));
cudaMalloc(&d_currDate, N * sizeof(bondsDateStruct));
cudaMalloc(&d_maturityDate, N * sizeof(bondsDateStruct));
cudaMalloc(&d_bondCleanPrice, N * sizeof(dataType));
cudaMalloc(&d_bond, N * sizeof(bondStruct));
cudaMalloc(&d_dummyStrike, N * sizeof(dataType));

// Allocate persistent GPU memory for results
cudaMalloc(&d_dirtyPrice, N * sizeof(dataType));
cudaMalloc(&d_accruedAmountCurrDate, N * sizeof(dataType));
cudaMalloc(&d_cleanPrice, N * sizeof(dataType));
cudaMalloc(&d_bondForwardVal, N * sizeof(dataType));
g_N = N;
}

// Build inArgs on host (matching bondsEngine.c / cpu_reference.c setup)
bondsYieldTermStruct* h_discountCurve = (bondsYieldTermStruct*)malloc(N * sizeof(bondsYieldTermStruct));
bondsYieldTermStruct* h_repoCurve = (bondsYieldTermStruct*)malloc(N * sizeof(bondsYieldTermStruct));
Expand All @@ -1120,16 +1128,16 @@ void solution_compute(int N, float* prices)
int repoCompounding = SIMPLE_INTEREST;
dataType repoCompoundFreq = 1;

bondsDateStruct bondIssueDate = intializeDateKernelCpu(g_issue_day[numBond], g_issue_month[numBond], g_issue_year[numBond]);
bondsDateStruct bondMaturityDate = intializeDateKernelCpu(g_maturity_day[numBond], g_maturity_month[numBond], g_maturity_year[numBond]);
bondsDateStruct bondIssueDate = intializeDateKernelCpu(issue_day[numBond], issue_month[numBond], issue_year[numBond]);
bondsDateStruct bondMaturityDate = intializeDateKernelCpu(maturity_day[numBond], maturity_month[numBond], maturity_year[numBond]);
bondsDateStruct todaysDate = intializeDateKernelCpu(bondMaturityDate.day-1, bondMaturityDate.month, bondMaturityDate.year);

bondStruct bond;
bond.startDate = bondIssueDate;
bond.maturityDate = bondMaturityDate;
bond.rate = g_rates[numBond];
bond.rate = rates[numBond];

dataType bondCouponFrequency = (dataType)g_coupon_freq;
dataType bondCouponFrequency = (dataType)coupon_freq;
dataType bondCleanPrice = 89.97693786;

bondsYieldTermStruct bondCurve;
Expand Down Expand Up @@ -1221,33 +1229,8 @@ void solution_compute(int N, float* prices)
free(h_accruedAmountCurrDate);
free(h_cleanPrice);
free(h_bondForwardVal);
}

void solution_free(void)
{
cudaFree(d_discountCurve);
cudaFree(d_repoCurve);
cudaFree(d_currDate);
cudaFree(d_maturityDate);
cudaFree(d_bondCleanPrice);
cudaFree(d_bond);
cudaFree(d_dummyStrike);
cudaFree(d_dirtyPrice);
cudaFree(d_accruedAmountCurrDate);
cudaFree(d_cleanPrice);
cudaFree(d_bondForwardVal);

d_discountCurve = NULL;
d_repoCurve = NULL;
d_currDate = NULL;
d_maturityDate = NULL;
d_bondCleanPrice = NULL;
d_bond = NULL;
d_dummyStrike = NULL;
d_dirtyPrice = NULL;
d_accruedAmountCurrDate = NULL;
d_cleanPrice = NULL;
d_bondForwardVal = NULL;
cudaDeviceSynchronize();
}

} // extern "C"
57 changes: 24 additions & 33 deletions tasks/dbscan/gpu_baseline.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@

// Module-level state
static int g_N = 0;
static float g_eps = 0;
static int g_minPts = 0;
static float* d_xs = NULL;
static float* d_ys = NULL;
static int* d_neighbor_counts = NULL;
Expand Down Expand Up @@ -59,24 +57,29 @@ __global__ void countNeighborsKernel(

// ===== Interface =====

extern "C" void solution_init(int N, const float* xs, const float* ys,
float eps, int minPts)
extern "C" void solution_free(void)
{
g_N = N;
g_eps = eps;
g_minPts = minPts;
if (d_xs) { cudaFree(d_xs); d_xs = NULL; }
if (d_ys) { cudaFree(d_ys); d_ys = NULL; }
if (d_neighbor_counts) { cudaFree(d_neighbor_counts); d_neighbor_counts = NULL; }
g_N = 0;
}

cudaMalloc(&d_xs, N * sizeof(float));
cudaMalloc(&d_ys, N * sizeof(float));
cudaMalloc(&d_neighbor_counts, N * sizeof(int));
extern "C" void solution_compute(int N, const float* xs, const float* ys,
float eps, int minPts, int* labels)
{
if (g_N != N) {
solution_free();
cudaMalloc(&d_xs, N * sizeof(float));
cudaMalloc(&d_ys, N * sizeof(float));
cudaMalloc(&d_neighbor_counts, N * sizeof(int));
g_N = N;
}

cudaMemcpy(d_xs, xs, N * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_ys, ys, N * sizeof(float), cudaMemcpyHostToDevice);
}

extern "C" void solution_compute(int N, int* labels)
{
float eps2 = g_eps * g_eps;
float eps2 = eps * eps;
int threads = 256;
int blocks = (N + threads - 1) / threads;

Expand All @@ -88,11 +91,6 @@ extern "C" void solution_compute(int N, int* labels)
int* h_counts = (int*)malloc(N * sizeof(int));
cudaMemcpy(h_counts, d_neighbor_counts, N * sizeof(int), cudaMemcpyDeviceToHost);

float* h_xs = (float*)malloc(N * sizeof(float));
float* h_ys = (float*)malloc(N * sizeof(float));
cudaMemcpy(h_xs, d_xs, N * sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(h_ys, d_ys, N * sizeof(float), cudaMemcpyDeviceToHost);

// DBSCAN BFS (matches clusterThread + expandCluster from original)
for (int i = 0; i < N; i++) labels[i] = UNPROCESSED;

Expand All @@ -102,7 +100,7 @@ extern "C" void solution_compute(int N, int* labels)
for (int i = 0; i < N; i++) {
if (labels[i] != UNPROCESSED) continue;

if (h_counts[i] < g_minPts) {
if (h_counts[i] < minPts) {
labels[i] = NOISE;
continue;
}
Expand All @@ -116,8 +114,8 @@ extern "C" void solution_compute(int N, int* labels)
// Add neighbors of i as seeds
for (int j = 0; j < N; j++) {
if (j == i) continue;
float dx = h_xs[i] - h_xs[j];
float dy = h_ys[i] - h_ys[j];
float dx = xs[i] - xs[j];
float dy = ys[i] - ys[j];
if (dx*dx + dy*dy <= eps2) {
if (labels[j] == UNPROCESSED) seeds[tail++] = j;
if (labels[j] == UNPROCESSED || labels[j] == NOISE)
Expand All @@ -128,12 +126,12 @@ extern "C" void solution_compute(int N, int* labels)
// BFS expand
while (head < tail) {
int q = seeds[head++];
if (h_counts[q] < g_minPts) continue;
if (h_counts[q] < minPts) continue;

for (int j = 0; j < N; j++) {
if (labels[j] != UNPROCESSED && labels[j] != NOISE) continue;
float dx = h_xs[q] - h_xs[j];
float dy = h_ys[q] - h_ys[j];
float dx = xs[q] - xs[j];
float dy = ys[q] - ys[j];
if (dx*dx + dy*dy <= eps2) {
if (labels[j] == UNPROCESSED) seeds[tail++] = j;
labels[j] = clusterId;
Expand All @@ -144,13 +142,6 @@ extern "C" void solution_compute(int N, int* labels)

free(seeds);
free(h_counts);
free(h_xs);
free(h_ys);
}

extern "C" void solution_free(void)
{
if (d_xs) { cudaFree(d_xs); d_xs = NULL; }
if (d_ys) { cudaFree(d_ys); d_ys = NULL; }
if (d_neighbor_counts) { cudaFree(d_neighbor_counts); d_neighbor_counts = NULL; }
cudaDeviceSynchronize();
}
Loading