From f3e85e25baac5c324057bd2ef66b60d9685664af Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 20 Mar 2024 19:35:28 -0400 Subject: [PATCH 01/28] Add khop demo --- include/khop.h | 36 +++++++++++++ src/sampling/random_walk/Makefile | 10 ++++ src/sampling/random_walk/khop_demo.cc | 74 +++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 include/khop.h create mode 100644 src/sampling/random_walk/Makefile create mode 100644 src/sampling/random_walk/khop_demo.cc diff --git a/include/khop.h b/include/khop.h new file mode 100644 index 0000000..56e1367 --- /dev/null +++ b/include/khop.h @@ -0,0 +1,36 @@ +// Copyright 2024 MIT +// Authors: Luc Gaitskell +#pragma once +#include "utils.h" +#include "graph.h" +#include +std::mt19937 gen(time(nullptr)); +// std::default_random_engine generator; +// std::uniform_real_distribution distribution(0.0,1.0); + +inline vidType sample_next_vbyte(Graph &g, vidType transit) +{ + auto adj_transit = g.N_vbyte(transit, "streamvbyte"); + vidType src_degree = adj_transit.size(); + if (src_degree == 0) + { + return (numeric_limits::max)(); + } + int idx = gen() % src_degree; + return adj_transit.data()[idx]; +} + +inline vidType sample_next(Graph &g, vidType transit, vidType src_degree, int step) +{ + if (transit == (numeric_limits::max)()) + { + return (numeric_limits::max)(); + } + if (src_degree == 0) + { + return (numeric_limits::max)(); + } + int idx = gen() % src_degree; + // int idx = 1; + return g.N(transit, idx); +} diff --git a/src/sampling/random_walk/Makefile b/src/sampling/random_walk/Makefile new file mode 100644 index 0000000..db157ca --- /dev/null +++ b/src/sampling/random_walk/Makefile @@ -0,0 +1,10 @@ +include ../../common.mk +#OBJS += verifier.o +all: $(OBJS) khop_demo + +khop_demo: khop_demo.o $(OBJS) + $(CXX) $(CXXFLAGS) $(INCLUDES) khop_demo.o $(OBJS) -o $@ -lgomp + mv $@ $(BIN) + +clean: + rm *.o diff --git a/src/sampling/random_walk/khop_demo.cc b/src/sampling/random_walk/khop_demo.cc new file mode 100644 index 0000000..dd548fb --- /dev/null +++ b/src/sampling/random_walk/khop_demo.cc @@ -0,0 +1,74 @@ +#include "graph.h" +#include "compressor.hh" +#include "khop.h" + +void kHopSolver(Graph &g, int n_samples, int n_threads) +{ + vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); + int step_count = sample_size(-1) * n_samples; + int total_count = step_count; + for (int step = 0; step < steps(); step++) + { + step_count *= sample_size(step); + total_count += step_count; + } + std::vector transits(total_count, 0); + for (int i = 0; i < inits.size(); i++) + { + transits[i] = inits[i]; + } + std::cout << "...initialized starting transits..." << std::endl; + + Timer t; + t.Start(); + // sample for defined number of steps + step_count = sample_size(-1) * n_samples; + int prev_step_count = n_samples; + int t_begin = 0; + int old_t_begin = 0; + for (int step = 0; step < steps(); step++) + { + std::cout << "STEP " << step << std::endl; + t_begin += step_count; + step_count *= sample_size(step); + prev_step_count *= sample_size(step - 1); + // sample every new transit in the step for every sample group + for (int idx = 0; idx < step_count; idx++) + { + int t_idx = t_begin + idx; + int old_t_idx = old_t_begin + idx / sample_size(step); + vidType old_t = transits[old_t_idx]; + if (old_t == (numeric_limits::max)()) + { + transits[t_idx] = (numeric_limits::max)(); + continue; + } + vidType new_t = sample_next_vbyte(g, old_t); + transits[t_idx] = new_t; + } + old_t_begin += prev_step_count; + } + t.Stop(); + std::cout << "result size: " << step_count + t_begin << std::endl; + std::cout << "Finished sampling in " << t.Seconds() << " sec" << std::endl; +} + +int main(int argc, char *argv[]) +{ + Graph g; + std::string in_prefix = argv[1]; + std::string out_prefix = argv[2]; + std::string scheme = "streamvbyte"; + bool permutated = false; + // save_compressed_graph(in_prefix, out_prefix); + g.load_compressed_graph(out_prefix, scheme, permutated); + g.print_meta_data(); + std::cout << "LOADED COMPRESSED GRAPH\n" + << std::endl; + + std::cout << "Begin sampling compressed graph..." << std::endl; + int n_samples = argc >= 4 ? atoi(argv[3]) : num_samples(); + int n_threads = argc >= 5 ? atoi(argv[4]) : 1; + kHopSolver(g, n_samples, n_threads); + return 0; +} From e680af939a8d0f501dd9c27270e7c60ac8e63e09 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 20 Mar 2024 19:39:45 -0400 Subject: [PATCH 02/28] Move to root --- src/{sampling/random_walk => sampling_random_walk}/Makefile | 2 +- src/{sampling/random_walk => sampling_random_walk}/khop_demo.cc | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/{sampling/random_walk => sampling_random_walk}/Makefile (87%) rename src/{sampling/random_walk => sampling_random_walk}/khop_demo.cc (100%) diff --git a/src/sampling/random_walk/Makefile b/src/sampling_random_walk/Makefile similarity index 87% rename from src/sampling/random_walk/Makefile rename to src/sampling_random_walk/Makefile index db157ca..b589288 100644 --- a/src/sampling/random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -1,4 +1,4 @@ -include ../../common.mk +include ../common.mk #OBJS += verifier.o all: $(OBJS) khop_demo diff --git a/src/sampling/random_walk/khop_demo.cc b/src/sampling_random_walk/khop_demo.cc similarity index 100% rename from src/sampling/random_walk/khop_demo.cc rename to src/sampling_random_walk/khop_demo.cc From 5d3a068ba1d93f208d647c2a5cbede468700ee83 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 20 Mar 2024 19:43:37 -0400 Subject: [PATCH 03/28] Add missing functions --- include/khop.h | 42 +++++++++++++++++++++++++++ src/sampling_random_walk/khop_demo.cc | 4 +-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/khop.h b/include/khop.h index 56e1367..ab8b14a 100644 --- a/include/khop.h +++ b/include/khop.h @@ -8,6 +8,48 @@ std::mt19937 gen(time(nullptr)); // std::default_random_engine generator; // std::uniform_real_distribution distribution(0.0,1.0); +/** + * Content below from Miranda Cai + */ + +/** + * Creates the initial seeds. + * + * @param seeds_size size of initial sample + * @param graph_size size of original graph (has to be at least seeds_size) + * @return vector containing node ids + */ +inline vector get_initial_transits(vidType seeds_size, vidType graph_size) +{ + // set node_ids; + // vector n_ids; + // while (node_ids.size() < seeds_size) { + // auto sample_id = gen() % graph_size; + // node_ids.insert(sample_id); + // } + // n_ids.insert(n_ids.end(), node_ids.begin(), node_ids.end()); + vector n_ids; + for (int i = 0; i < seeds_size; i++) + { + n_ids.push_back(gen() % graph_size); + } + return n_ids; +} + +/** + * For given step, return number of samples to take. Step of -1 for original sample transits + */ +inline int sample_size(int step) +{ + if (step == -1) + return 1; + if (step == 0) + return 25; + return 10; + // if (step == -1) return 2; + // return 2; +} + inline vidType sample_next_vbyte(Graph &g, vidType transit) { auto adj_transit = g.N_vbyte(transit, "streamvbyte"); diff --git a/src/sampling_random_walk/khop_demo.cc b/src/sampling_random_walk/khop_demo.cc index dd548fb..b885f33 100644 --- a/src/sampling_random_walk/khop_demo.cc +++ b/src/sampling_random_walk/khop_demo.cc @@ -7,7 +7,7 @@ void kHopSolver(Graph &g, int n_samples, int n_threads) vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); int step_count = sample_size(-1) * n_samples; int total_count = step_count; - for (int step = 0; step < steps(); step++) + for (int step = 0; step < 3; step++) { step_count *= sample_size(step); total_count += step_count; @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) << std::endl; std::cout << "Begin sampling compressed graph..." << std::endl; - int n_samples = argc >= 4 ? atoi(argv[3]) : num_samples(); + int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; int n_threads = argc >= 5 ? atoi(argv[4]) : 1; kHopSolver(g, n_samples, n_threads); return 0; From 6fc0e2ad9cc3e53c52712fbc9b624d40316ba3c7 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 20 Mar 2024 19:45:30 -0400 Subject: [PATCH 04/28] Bring steps function over --- include/khop.h | 5 +++++ src/sampling_random_walk/khop_demo.cc | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/khop.h b/include/khop.h index ab8b14a..309a4cd 100644 --- a/include/khop.h +++ b/include/khop.h @@ -12,6 +12,11 @@ std::mt19937 gen(time(nullptr)); * Content below from Miranda Cai */ +inline int steps() +{ + return 3; +} + /** * Creates the initial seeds. * diff --git a/src/sampling_random_walk/khop_demo.cc b/src/sampling_random_walk/khop_demo.cc index b885f33..760e674 100644 --- a/src/sampling_random_walk/khop_demo.cc +++ b/src/sampling_random_walk/khop_demo.cc @@ -7,7 +7,7 @@ void kHopSolver(Graph &g, int n_samples, int n_threads) vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); int step_count = sample_size(-1) * n_samples; int total_count = step_count; - for (int step = 0; step < 3; step++) + for (int step = 0; step < steps(); step++) { step_count *= sample_size(step); total_count += step_count; From e8512d433eda6123523c1a329f1773f0b4e9d72b Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 22 Mar 2024 10:00:37 -0400 Subject: [PATCH 05/28] Add initial random walk implementation --- src/sampling_random_walk/random_walk_demo.cc | 66 ++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 src/sampling_random_walk/random_walk_demo.cc diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc new file mode 100644 index 0000000..887669a --- /dev/null +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -0,0 +1,66 @@ +#include "graph.h" +#include "compressor.hh" +#include "khop.h" + +void rWalkSolver(Graph &g, int n_samples, int n_threads) +{ + vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); + int total_count = (steps() + 1) * n_samples; + + std::vector transits(total_count, 0); + for (int i = 0; i < inits.size(); i++) + { + transits[i] = inits[i]; + } + std::cout << "...initialized starting transits..." << std::endl; + + Timer t; + t.Start(); + // sample for defined number of steps + + // sampling length is set to `steps()` for all samples + for (int step = 0; step < steps(); step++) + { + std::cout << "STEP " << step << std::endl; + + // sample every new transit in the step for every sample group + for (int sample_i = 0; sample_i < n_samples; sample_i++) + { + + vidType sample_transit = transits[step * n_samples + sample_i]; + + /// USE THIS? + // if (old_t == (numeric_limits::max)()) + // { + // transits[t_idx] = (numeric_limits::max)(); + // continue; + // } + + vidType new_t = sample_next_vbyte(g, sample_transit); + transits[step * (n_samples + 1) + sample_i] = new_t; + } + } + t.Stop(); + std::cout << "result size: " << total_count << std::endl; + std::cout << "Finished sampling in " << t.Seconds() << " sec" << std::endl; +} + +int main(int argc, char *argv[]) +{ + Graph g; + std::string in_prefix = argv[1]; + std::string out_prefix = argv[2]; + std::string scheme = "streamvbyte"; + bool permutated = false; + // save_compressed_graph(in_prefix, out_prefix); + g.load_compressed_graph(out_prefix, scheme, permutated); + g.print_meta_data(); + std::cout << "LOADED COMPRESSED GRAPH\n" + << std::endl; + + std::cout << "Begin sampling compressed graph..." << std::endl; + int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; + int n_threads = argc >= 5 ? atoi(argv[4]) : 1; + rWalkSolver(g, n_samples, n_threads); + return 0; +} From 94836ed3b9c399ca4b758cb02f842943c7cac5ad Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 22 Mar 2024 10:15:10 -0400 Subject: [PATCH 06/28] Overwrite OBJS --- src/sampling_random_walk/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sampling_random_walk/Makefile b/src/sampling_random_walk/Makefile index b589288..b9f4fef 100644 --- a/src/sampling_random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -1,5 +1,7 @@ include ../common.mk #OBJS += verifier.o +OBJS = graph.o VertexSet.o + all: $(OBJS) khop_demo khop_demo: khop_demo.o $(OBJS) From 1f8ae1cfaed1316dcc2c027ab37c1480dce408e9 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 5 Apr 2024 09:08:00 -0400 Subject: [PATCH 07/28] Disable main in compressor --- src/structure/compressor.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/structure/compressor.cc b/src/structure/compressor.cc index 2700ce4..343a9a3 100644 --- a/src/structure/compressor.cc +++ b/src/structure/compressor.cc @@ -255,6 +255,7 @@ void printusage() { << " [-a alignment(0)]\n"; } +# if 0 int main(int argc,char *argv[]) { int zeta_k = 2, permutate = 0, degree_threshold = 32; int alignment = 0; // 0: not aligned; 1: byte aligned; 2: word aligned @@ -348,3 +349,4 @@ int main(int argc,char *argv[]) { std::cout << "compression completed!\n"; return 0; } +#endif \ No newline at end of file From 4e94ea4c9702a426d8aac15d7a4fa53215005e97 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 5 Apr 2024 09:08:06 -0400 Subject: [PATCH 08/28] Fix make file for khop demo --- src/sampling_random_walk/Makefile | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/sampling_random_walk/Makefile b/src/sampling_random_walk/Makefile index b9f4fef..d06f25d 100644 --- a/src/sampling_random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -1,11 +1,23 @@ include ../common.mk -#OBJS += verifier.o OBJS = graph.o VertexSet.o +CGOBJS = graph_compressed.o cgr_decoder.o vbyte_decoder.o +CGCUOBJS = graph_gpu_compressed.o cgr_decoder_gpu.o +COBJS = cgr_encoder.o unary_encoder.o vbyte_encoder.o +NVFLAGS += -dc +CXXFLAGS += -Wno-narrowing +#LIBS += $(SIMDCAI_LIB) +NVINCLUDES += -I./gpu_kernels +INCLUDES +=-I$(CILK_CLANG)/include/cilk +vpath %.cc ../common +vpath %.cc ../structure +vpath %.cu ../structure +# VPATH+=../common +BIN = ../../bin all: $(OBJS) khop_demo -khop_demo: khop_demo.o $(OBJS) - $(CXX) $(CXXFLAGS) $(INCLUDES) khop_demo.o $(OBJS) -o $@ -lgomp +khop_demo: $(OBJS) $(CGOBJS) $(COBJS) compressor.o khop_demo.o + $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o khop_demo.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) clean: From 73bef21943c5191c2d94b7a41f910afc895055cb Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 5 Apr 2024 09:36:05 -0400 Subject: [PATCH 09/28] Fix indexing in rwalk sample --- src/sampling_random_walk/random_walk_demo.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc index 887669a..217aa1f 100644 --- a/src/sampling_random_walk/random_walk_demo.cc +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -37,7 +37,7 @@ void rWalkSolver(Graph &g, int n_samples, int n_threads) // } vidType new_t = sample_next_vbyte(g, sample_transit); - transits[step * (n_samples + 1) + sample_i] = new_t; + transits[(step + 1) * n_samples + sample_i] = new_t; } } t.Stop(); From d75d13e2d49b51ffaa3ecf199151d816bd077885 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 5 Apr 2024 09:36:22 -0400 Subject: [PATCH 10/28] Remove old note with comment --- src/sampling_random_walk/random_walk_demo.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc index 217aa1f..6e2c448 100644 --- a/src/sampling_random_walk/random_walk_demo.cc +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -29,13 +29,6 @@ void rWalkSolver(Graph &g, int n_samples, int n_threads) vidType sample_transit = transits[step * n_samples + sample_i]; - /// USE THIS? - // if (old_t == (numeric_limits::max)()) - // { - // transits[t_idx] = (numeric_limits::max)(); - // continue; - // } - vidType new_t = sample_next_vbyte(g, sample_transit); transits[(step + 1) * n_samples + sample_i] = new_t; } From 5334ea2c26c52a060ec9b014ce548337124961a2 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 5 Apr 2024 09:36:34 -0400 Subject: [PATCH 11/28] Add debug print --- src/sampling_random_walk/random_walk_demo.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc index 6e2c448..e5d0c69 100644 --- a/src/sampling_random_walk/random_walk_demo.cc +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -28,6 +28,7 @@ void rWalkSolver(Graph &g, int n_samples, int n_threads) { vidType sample_transit = transits[step * n_samples + sample_i]; + // std::cout << "sample_transit: at " << step << " " << sample_i << " " << sample_transit << std::endl; vidType new_t = sample_next_vbyte(g, sample_transit); transits[(step + 1) * n_samples + sample_i] = new_t; From a08eced0ce38411a3d57f9ec6366c05061b158d4 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Fri, 5 Apr 2024 09:36:53 -0400 Subject: [PATCH 12/28] Add `rwalk_demo` make item --- src/sampling_random_walk/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sampling_random_walk/Makefile b/src/sampling_random_walk/Makefile index d06f25d..6d76cbf 100644 --- a/src/sampling_random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -20,5 +20,10 @@ khop_demo: $(OBJS) $(CGOBJS) $(COBJS) compressor.o khop_demo.o $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o khop_demo.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) +rwalk_demo: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_demo.o + $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o random_walk_demo.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) + mv $@ $(BIN) + + clean: rm *.o From e247ef0f53809d7f53322c50602e80443aade004 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 10 Apr 2024 08:59:20 -0400 Subject: [PATCH 13/28] Comment out print from inside loop --- src/sampling_random_walk/random_walk_demo.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc index e5d0c69..43cea8d 100644 --- a/src/sampling_random_walk/random_walk_demo.cc +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -21,7 +21,7 @@ void rWalkSolver(Graph &g, int n_samples, int n_threads) // sampling length is set to `steps()` for all samples for (int step = 0; step < steps(); step++) { - std::cout << "STEP " << step << std::endl; + // std::cout << "STEP " << step << std::endl; // sample every new transit in the step for every sample group for (int sample_i = 0; sample_i < n_samples; sample_i++) From ef5d7d6ff3444f4abc253f100f704d7fe80c8911 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 10 Apr 2024 09:17:01 -0400 Subject: [PATCH 14/28] Simplify arguments --- src/sampling_random_walk/random_walk_demo.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc index 43cea8d..b6faad6 100644 --- a/src/sampling_random_walk/random_walk_demo.cc +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -43,18 +43,16 @@ int main(int argc, char *argv[]) { Graph g; std::string in_prefix = argv[1]; - std::string out_prefix = argv[2]; std::string scheme = "streamvbyte"; bool permutated = false; - // save_compressed_graph(in_prefix, out_prefix); - g.load_compressed_graph(out_prefix, scheme, permutated); + g.load_compressed_graph(in_prefix, scheme, permutated); g.print_meta_data(); std::cout << "LOADED COMPRESSED GRAPH\n" << std::endl; std::cout << "Begin sampling compressed graph..." << std::endl; - int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; - int n_threads = argc >= 5 ? atoi(argv[4]) : 1; + int n_samples = argc >= 3 ? atoi(argv[2]) : 40000; + int n_threads = argc >= 4 ? atoi(argv[3]) : 1; rWalkSolver(g, n_samples, n_threads); return 0; } From 589ee7f6155ed7356710b037e58bac5c59c34d1f Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 10 Apr 2024 09:17:07 -0400 Subject: [PATCH 15/28] Add run example --- src/sampling_random_walk/README.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/sampling_random_walk/README.md diff --git a/src/sampling_random_walk/README.md b/src/sampling_random_walk/README.md new file mode 100644 index 0000000..180b46b --- /dev/null +++ b/src/sampling_random_walk/README.md @@ -0,0 +1,4 @@ +``` +make rwalk_demo && ../../bin/rwalk_demo /home/lgaitskell/data-xhchen/tester/vbyte 40000 + +``` From 526d0f53be974827061c9a18defc9c6ee0ab6a62 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 10 Apr 2024 09:20:34 -0400 Subject: [PATCH 16/28] Allow configuring sample steps from arg --- src/sampling_random_walk/README.md | 3 +-- src/sampling_random_walk/random_walk_demo.cc | 15 ++++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sampling_random_walk/README.md b/src/sampling_random_walk/README.md index 180b46b..0bdadc0 100644 --- a/src/sampling_random_walk/README.md +++ b/src/sampling_random_walk/README.md @@ -1,4 +1,3 @@ ``` -make rwalk_demo && ../../bin/rwalk_demo /home/lgaitskell/data-xhchen/tester/vbyte 40000 - +make rwalk_demo && ../../bin/rwalk_demo /home/lgaitskell/data-xhchen/tester/vbyte 3 40 ``` diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_demo.cc index b6faad6..5189ec4 100644 --- a/src/sampling_random_walk/random_walk_demo.cc +++ b/src/sampling_random_walk/random_walk_demo.cc @@ -2,10 +2,10 @@ #include "compressor.hh" #include "khop.h" -void rWalkSolver(Graph &g, int n_samples, int n_threads) +void rWalkSolver(Graph &g, int sample_steps, int n_samples, int n_threads) { vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); - int total_count = (steps() + 1) * n_samples; + int total_count = (sample_steps + 1) * n_samples; std::vector transits(total_count, 0); for (int i = 0; i < inits.size(); i++) @@ -18,8 +18,8 @@ void rWalkSolver(Graph &g, int n_samples, int n_threads) t.Start(); // sample for defined number of steps - // sampling length is set to `steps()` for all samples - for (int step = 0; step < steps(); step++) + // sampling length is set to `sample_steps` for all samples + for (int step = 0; step < sample_steps; step++) { // std::cout << "STEP " << step << std::endl; @@ -50,9 +50,10 @@ int main(int argc, char *argv[]) std::cout << "LOADED COMPRESSED GRAPH\n" << std::endl; + int sample_steps = atoi(argv[2]); + int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; + int n_threads = argc >= 5 ? atoi(argv[4]) : 1; std::cout << "Begin sampling compressed graph..." << std::endl; - int n_samples = argc >= 3 ? atoi(argv[2]) : 40000; - int n_threads = argc >= 4 ? atoi(argv[3]) : 1; - rWalkSolver(g, n_samples, n_threads); + rWalkSolver(g, sample_steps, n_samples, n_threads); return 0; } From 4f87d2aa9e818173c2a98087fe37a8b6c1fbe1fa Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 10 Apr 2024 10:11:55 -0400 Subject: [PATCH 17/28] Rename demo to cpu --- src/sampling_random_walk/Makefile | 4 ++-- src/sampling_random_walk/README.md | 2 +- .../{random_walk_demo.cc => random_walk_cpu.cc} | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename src/sampling_random_walk/{random_walk_demo.cc => random_walk_cpu.cc} (100%) diff --git a/src/sampling_random_walk/Makefile b/src/sampling_random_walk/Makefile index 6d76cbf..c470b20 100644 --- a/src/sampling_random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -20,8 +20,8 @@ khop_demo: $(OBJS) $(CGOBJS) $(COBJS) compressor.o khop_demo.o $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o khop_demo.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) -rwalk_demo: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_demo.o - $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o random_walk_demo.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) +rwalk_cpu: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_demo.o + $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o random_walk_cpu.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) diff --git a/src/sampling_random_walk/README.md b/src/sampling_random_walk/README.md index 0bdadc0..d141ebe 100644 --- a/src/sampling_random_walk/README.md +++ b/src/sampling_random_walk/README.md @@ -1,3 +1,3 @@ ``` -make rwalk_demo && ../../bin/rwalk_demo /home/lgaitskell/data-xhchen/tester/vbyte 3 40 +make rwalk_cpu && ../../bin/rwalk_cpu /home/lgaitskell/data-xhchen/tester/vbyte 3 40 ``` diff --git a/src/sampling_random_walk/random_walk_demo.cc b/src/sampling_random_walk/random_walk_cpu.cc similarity index 100% rename from src/sampling_random_walk/random_walk_demo.cc rename to src/sampling_random_walk/random_walk_cpu.cc From 09ef8c6b8815ed2215fc27d5498be72a9118e72e Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Wed, 10 Apr 2024 10:28:19 -0400 Subject: [PATCH 18/28] Fix missed name update --- src/sampling_random_walk/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sampling_random_walk/Makefile b/src/sampling_random_walk/Makefile index c470b20..2413aff 100644 --- a/src/sampling_random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -20,7 +20,7 @@ khop_demo: $(OBJS) $(CGOBJS) $(COBJS) compressor.o khop_demo.o $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o khop_demo.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) -rwalk_cpu: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_demo.o +rwalk_cpu: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_cpu.o $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o random_walk_cpu.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) From 14a4bfda089f3099819036cc77f2980e0f1d3367 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 21 Apr 2024 11:21:34 -0400 Subject: [PATCH 19/28] Fix dead-ends in sampling --- src/sampling_random_walk/random_walk_cpu.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/sampling_random_walk/random_walk_cpu.cc b/src/sampling_random_walk/random_walk_cpu.cc index 5189ec4..dfa63c1 100644 --- a/src/sampling_random_walk/random_walk_cpu.cc +++ b/src/sampling_random_walk/random_walk_cpu.cc @@ -29,8 +29,16 @@ void rWalkSolver(Graph &g, int sample_steps, int n_samples, int n_threads) vidType sample_transit = transits[step * n_samples + sample_i]; // std::cout << "sample_transit: at " << step << " " << sample_i << " " << sample_transit << std::endl; + vidType new_t; + if (sample_transit == (numeric_limits::max)()) + { + new_t = sample_transit; + } + else + { + new_t = sample_next_vbyte(g, sample_transit); + } - vidType new_t = sample_next_vbyte(g, sample_transit); transits[(step + 1) * n_samples + sample_i] = new_t; } } From ea6c3927451e5e68dbf7239d0cbb4f4c12e122ac Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 21 Apr 2024 11:32:55 -0400 Subject: [PATCH 20/28] Add initial random walk omp implementation --- src/sampling_random_walk/Makefile | 4 ++ src/sampling_random_walk/random_walk_omp.cc | 78 +++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 src/sampling_random_walk/random_walk_omp.cc diff --git a/src/sampling_random_walk/Makefile b/src/sampling_random_walk/Makefile index 2413aff..524a9c9 100644 --- a/src/sampling_random_walk/Makefile +++ b/src/sampling_random_walk/Makefile @@ -24,6 +24,10 @@ rwalk_cpu: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_cpu.o $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o random_walk_cpu.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) mv $@ $(BIN) +rwalk_omp: $(OBJS) $(CGOBJS) $(COBJS) compressor.o random_walk_omp.o + $(CXX) $(CXXFLAGS) $(INCLUDES) compressor.o random_walk_omp.o $(OBJS) $(CGOBJS) $(COBJS) -o $@ $(LIBS) + mv $@ $(BIN) + clean: rm *.o diff --git a/src/sampling_random_walk/random_walk_omp.cc b/src/sampling_random_walk/random_walk_omp.cc new file mode 100644 index 0000000..d176402 --- /dev/null +++ b/src/sampling_random_walk/random_walk_omp.cc @@ -0,0 +1,78 @@ +#include +#include "graph.h" +#include "compressor.hh" +#include "khop.h" + +void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) +{ + int num_threads = 1; + omp_set_num_threads(n_threads); +#pragma omp parallel + { + num_threads = omp_get_num_threads(); + } + vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); + int total_count = (sample_steps + 1) * n_samples; + + std::vector transits(total_count, 0); + for (int i = 0; i < inits.size(); i++) + { + transits[i * sample_steps] = inits[i]; + } + std::cout << "...initialized starting transits..." << std::endl; + + Timer t; + t.Start(); + // sample for defined number of steps + + // sampling length is set to `sample_steps` for all samples + + for (int step = 0; step < sample_steps; step++) + { + // std::cout << "STEP " << step << std::endl; + + // sample every new transit in the step for every sample group +#pragma omp parallel for + for (int sample_i = 0; sample_i < n_samples; sample_i++) + { + + vidType sample_transit = transits[step + sample_i * sample_steps]; + // std::cout << "sample_transit: at " << step << " " << sample_i << " " << sample_transit << std::endl; + + vidType new_t; + if (sample_transit == (numeric_limits::max)()) + { + new_t = sample_transit; + } + else + { + new_t = sample_next_vbyte(g, sample_transit); + } + + transits[step + 1 + sample_i * sample_steps] = new_t; + } + } + t.Stop(); + std::cout << "result size: " << total_count << std::endl; + std::cout << "Finished sampling in " << t.Seconds() << " sec" << std::endl; +} + +int main(int argc, char *argv[]) +{ + Graph g; + std::string in_prefix = argv[1]; + std::string scheme = "streamvbyte"; + bool permutated = false; + g.load_graph(in_prefix); + g.load_compressed_graph(in_prefix, scheme, permutated); + g.print_meta_data(); + std::cout << "LOADED COMPRESSED GRAPH\n" + << std::endl; + + int sample_steps = atoi(argv[2]); + int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; + int n_threads = argc >= 5 ? atoi(argv[4]) : 1; + std::cout << "Begin OpenMP sampling compressed graph..." << std::endl; + rWalkOMPSolver(g, sample_steps, n_samples, n_threads); + return 0; +} From b032f2b5687d4d9e09ea96c8fd776386f56e3ed9 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 21 Apr 2024 11:33:31 -0400 Subject: [PATCH 21/28] Add more run demos to README --- src/sampling_random_walk/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sampling_random_walk/README.md b/src/sampling_random_walk/README.md index d141ebe..847659c 100644 --- a/src/sampling_random_walk/README.md +++ b/src/sampling_random_walk/README.md @@ -1,3 +1,7 @@ ``` make rwalk_cpu && ../../bin/rwalk_cpu /home/lgaitskell/data-xhchen/tester/vbyte 3 40 + +make rwalk_omp && ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/tester/vbyte 3 40 + +make rwalk_omp && ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/livej/dag-streamvbyte 30 4000000 ``` From e885deea4c79750f7d84928992303dd047a9ddb5 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 21 Apr 2024 11:51:47 -0400 Subject: [PATCH 22/28] Fix khop uncompressed sample next --- include/khop.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/khop.h b/include/khop.h index 309a4cd..be00e0d 100644 --- a/include/khop.h +++ b/include/khop.h @@ -67,12 +67,13 @@ inline vidType sample_next_vbyte(Graph &g, vidType transit) return adj_transit.data()[idx]; } -inline vidType sample_next(Graph &g, vidType transit, vidType src_degree, int step) +inline vidType sample_next(Graph &g, vidType transit) { if (transit == (numeric_limits::max)()) { return (numeric_limits::max)(); } + vidType src_degree = g.N(transit).size(); if (src_degree == 0) { return (numeric_limits::max)(); From 81c6ab74cae00e7215dc6ff621926ffb9cc29592 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 5 May 2024 11:03:33 -0400 Subject: [PATCH 23/28] Change memory access to group steps --- src/sampling_random_walk/random_walk_omp.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sampling_random_walk/random_walk_omp.cc b/src/sampling_random_walk/random_walk_omp.cc index d176402..d7cadcf 100644 --- a/src/sampling_random_walk/random_walk_omp.cc +++ b/src/sampling_random_walk/random_walk_omp.cc @@ -17,7 +17,7 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) std::vector transits(total_count, 0); for (int i = 0; i < inits.size(); i++) { - transits[i * sample_steps] = inits[i]; + transits[i] = inits[i]; } std::cout << "...initialized starting transits..." << std::endl; @@ -27,16 +27,16 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) // sampling length is set to `sample_steps` for all samples - for (int step = 0; step < sample_steps; step++) - { - // std::cout << "STEP " << step << std::endl; // sample every new transit in the step for every sample group #pragma omp parallel for - for (int sample_i = 0; sample_i < n_samples; sample_i++) + for (int sample_i = 0; sample_i < n_samples; sample_i++) + { + for (int step = 0; step < sample_steps; step++) { + // std::cout << "STEP " << step << std::endl; - vidType sample_transit = transits[step + sample_i * sample_steps]; + vidType sample_transit = transits[step * n_samples + sample_i]; // std::cout << "sample_transit: at " << step << " " << sample_i << " " << sample_transit << std::endl; vidType new_t; @@ -49,7 +49,7 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) new_t = sample_next_vbyte(g, sample_transit); } - transits[step + 1 + sample_i * sample_steps] = new_t; + transits[(step + 1) * n_samples + sample_i] = new_t; } } t.Stop(); From b279448f3156a7e77c59f0888811b6dc2e6fa077 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 5 May 2024 11:17:19 -0400 Subject: [PATCH 24/28] FIx parallelization with isolated random gens --- include/khop.h | 8 ++--- src/sampling_random_walk/random_walk_cpu.cc | 2 +- src/sampling_random_walk/random_walk_omp.cc | 39 ++++++++++++--------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/include/khop.h b/include/khop.h index be00e0d..ae24b7d 100644 --- a/include/khop.h +++ b/include/khop.h @@ -4,7 +4,7 @@ #include "utils.h" #include "graph.h" #include -std::mt19937 gen(time(nullptr)); +std::mt19937 gen_global(time(nullptr)); // std::default_random_engine generator; // std::uniform_real_distribution distribution(0.0,1.0); @@ -36,7 +36,7 @@ inline vector get_initial_transits(vidType seeds_size, vidType graph_si vector n_ids; for (int i = 0; i < seeds_size; i++) { - n_ids.push_back(gen() % graph_size); + n_ids.push_back(gen_global() % graph_size); } return n_ids; } @@ -55,7 +55,7 @@ inline int sample_size(int step) // return 2; } -inline vidType sample_next_vbyte(Graph &g, vidType transit) +inline vidType sample_next_vbyte(Graph &g, vidType transit, std::mt19937 &gen) { auto adj_transit = g.N_vbyte(transit, "streamvbyte"); vidType src_degree = adj_transit.size(); @@ -67,7 +67,7 @@ inline vidType sample_next_vbyte(Graph &g, vidType transit) return adj_transit.data()[idx]; } -inline vidType sample_next(Graph &g, vidType transit) +inline vidType sample_next(Graph &g, vidType transit, std::mt19937 &gen) { if (transit == (numeric_limits::max)()) { diff --git a/src/sampling_random_walk/random_walk_cpu.cc b/src/sampling_random_walk/random_walk_cpu.cc index dfa63c1..72c3076 100644 --- a/src/sampling_random_walk/random_walk_cpu.cc +++ b/src/sampling_random_walk/random_walk_cpu.cc @@ -36,7 +36,7 @@ void rWalkSolver(Graph &g, int sample_steps, int n_samples, int n_threads) } else { - new_t = sample_next_vbyte(g, sample_transit); + new_t = sample_next_vbyte(g, sample_transit, gen_global); } transits[(step + 1) * n_samples + sample_i] = new_t; diff --git a/src/sampling_random_walk/random_walk_omp.cc b/src/sampling_random_walk/random_walk_omp.cc index d7cadcf..60632d1 100644 --- a/src/sampling_random_walk/random_walk_omp.cc +++ b/src/sampling_random_walk/random_walk_omp.cc @@ -27,29 +27,34 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) // sampling length is set to `sample_steps` for all samples +#pragma omp parallel + { + int t_idx = omp_get_thread_num(); + std::mt19937 gen(t_idx); // sample every new transit in the step for every sample group -#pragma omp parallel for - for (int sample_i = 0; sample_i < n_samples; sample_i++) - { - for (int step = 0; step < sample_steps; step++) +#pragma omp for // schedule(dynamic) // schedule(static) num_threads(8) + for (int sample_i = 0; sample_i < n_samples; sample_i++) { - // std::cout << "STEP " << step << std::endl; + for (int step = 0; step < sample_steps; step++) + { + // std::cout << "STEP " << step << std::endl; - vidType sample_transit = transits[step * n_samples + sample_i]; - // std::cout << "sample_transit: at " << step << " " << sample_i << " " << sample_transit << std::endl; + vidType sample_transit = transits[step * n_samples + sample_i]; + // std::cout << "sample_transit: at " << step << " " << sample_i << " " << sample_transit << std::endl; - vidType new_t; - if (sample_transit == (numeric_limits::max)()) - { - new_t = sample_transit; - } - else - { - new_t = sample_next_vbyte(g, sample_transit); - } + vidType new_t; + if (sample_transit == (numeric_limits::max)()) + { + new_t = sample_transit; + } + else + { + new_t = sample_next_vbyte(g, sample_transit, gen); + } - transits[(step + 1) * n_samples + sample_i] = new_t; + transits[(step + 1) * n_samples + sample_i] = new_t; + } } } t.Stop(); From e4e745b06293cbc731ea0e2a8c21874753fbe4df Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 5 May 2024 11:18:03 -0400 Subject: [PATCH 25/28] Add parallelization to initial transit setup --- src/sampling_random_walk/random_walk_omp.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sampling_random_walk/random_walk_omp.cc b/src/sampling_random_walk/random_walk_omp.cc index 60632d1..d07f880 100644 --- a/src/sampling_random_walk/random_walk_omp.cc +++ b/src/sampling_random_walk/random_walk_omp.cc @@ -15,6 +15,7 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) int total_count = (sample_steps + 1) * n_samples; std::vector transits(total_count, 0); +#pragma omp parallel for for (int i = 0; i < inits.size(); i++) { transits[i] = inits[i]; From 9d92c06ba0b54e38099f7cc86317d8afa031e03c Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 5 May 2024 11:58:54 -0400 Subject: [PATCH 26/28] Support specifying compression type --- src/sampling_random_walk/random_walk_cpu.cc | 47 ++++++++++++++----- src/sampling_random_walk/random_walk_omp.cc | 51 +++++++++++++++------ 2 files changed, 73 insertions(+), 25 deletions(-) diff --git a/src/sampling_random_walk/random_walk_cpu.cc b/src/sampling_random_walk/random_walk_cpu.cc index 72c3076..594ef47 100644 --- a/src/sampling_random_walk/random_walk_cpu.cc +++ b/src/sampling_random_walk/random_walk_cpu.cc @@ -2,7 +2,7 @@ #include "compressor.hh" #include "khop.h" -void rWalkSolver(Graph &g, int sample_steps, int n_samples, int n_threads) +void rWalkSolver(Graph &g, bool vbyte, int sample_steps, int n_samples) { vector inits = get_initial_transits(sample_size(-1) * n_samples, g.V()); int total_count = (sample_steps + 1) * n_samples; @@ -36,7 +36,14 @@ void rWalkSolver(Graph &g, int sample_steps, int n_samples, int n_threads) } else { - new_t = sample_next_vbyte(g, sample_transit, gen_global); + if (vbyte) + { + new_t = sample_next_vbyte(g, sample_transit, gen_global); + } + else + { + new_t = sample_next(g, sample_transit, gen_global); + } } transits[(step + 1) * n_samples + sample_i] = new_t; @@ -51,17 +58,33 @@ int main(int argc, char *argv[]) { Graph g; std::string in_prefix = argv[1]; - std::string scheme = "streamvbyte"; - bool permutated = false; - g.load_compressed_graph(in_prefix, scheme, permutated); + + std::string scheme = argv[2]; + bool vbyte = (scheme == "streamvbyte"); + if (scheme == "streamvbyte") + { + std::string scheme = "streamvbyte"; + bool permutated = false; + g.load_compressed_graph(in_prefix, scheme, permutated); + std::cout << "Loaded COMPRESSED Graph\n" + << std::endl; + } + else if (scheme == "uncompressed") + { + g.load_graph(in_prefix); + std::cout << "Loaded UNcompressed Graph\n" + << std::endl; + } + else + { + std::cout << "Incorrect or no scheme specified\n" + << std::endl; + exit(1); + } g.print_meta_data(); - std::cout << "LOADED COMPRESSED GRAPH\n" - << std::endl; - int sample_steps = atoi(argv[2]); - int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; - int n_threads = argc >= 5 ? atoi(argv[4]) : 1; - std::cout << "Begin sampling compressed graph..." << std::endl; - rWalkSolver(g, sample_steps, n_samples, n_threads); + int sample_steps = atoi(argv[3]); + int n_samples = argc >= 4 ? atoi(argv[4]) : 40000; + rWalkSolver(g, vbyte, sample_steps, n_samples); return 0; } diff --git a/src/sampling_random_walk/random_walk_omp.cc b/src/sampling_random_walk/random_walk_omp.cc index d07f880..ad5490d 100644 --- a/src/sampling_random_walk/random_walk_omp.cc +++ b/src/sampling_random_walk/random_walk_omp.cc @@ -3,7 +3,7 @@ #include "compressor.hh" #include "khop.h" -void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) +void rWalkOMPSolver(Graph &g, bool vbyte, int sample_steps, int n_samples, int n_threads) { int num_threads = 1; omp_set_num_threads(n_threads); @@ -22,6 +22,8 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) } std::cout << "...initialized starting transits..." << std::endl; + std::cout << "Begin OpenMP sampling (" << num_threads << " threads)..." << std::endl; + Timer t; t.Start(); // sample for defined number of steps @@ -51,7 +53,14 @@ void rWalkOMPSolver(Graph &g, int sample_steps, int n_samples, int n_threads) } else { - new_t = sample_next_vbyte(g, sample_transit, gen); + if (vbyte) + { + new_t = sample_next_vbyte(g, sample_transit, gen); + } + else + { + new_t = sample_next(g, sample_transit, gen); + } } transits[(step + 1) * n_samples + sample_i] = new_t; @@ -67,18 +76,34 @@ int main(int argc, char *argv[]) { Graph g; std::string in_prefix = argv[1]; - std::string scheme = "streamvbyte"; - bool permutated = false; - g.load_graph(in_prefix); - g.load_compressed_graph(in_prefix, scheme, permutated); + + std::string scheme = argv[2]; + bool vbyte = (scheme == "streamvbyte"); + if (scheme == "streamvbyte") + { + std::string scheme = "streamvbyte"; + bool permutated = false; + g.load_compressed_graph(in_prefix, scheme, permutated); + std::cout << "Loaded COMPRESSED Graph\n" + << std::endl; + } + else if (scheme == "uncompressed") + { + g.load_graph(in_prefix); + std::cout << "Loaded UNcompressed Graph\n" + << std::endl; + } + else + { + std::cout << "Incorrect or no scheme specified\n" + << std::endl; + exit(1); + } g.print_meta_data(); - std::cout << "LOADED COMPRESSED GRAPH\n" - << std::endl; - int sample_steps = atoi(argv[2]); - int n_samples = argc >= 4 ? atoi(argv[3]) : 40000; - int n_threads = argc >= 5 ? atoi(argv[4]) : 1; - std::cout << "Begin OpenMP sampling compressed graph..." << std::endl; - rWalkOMPSolver(g, sample_steps, n_samples, n_threads); + int sample_steps = atoi(argv[3]); + int n_samples = argc >= 4 ? atoi(argv[4]) : 40000; + int n_threads = argc >= 5 ? atoi(argv[5]) : 1; + rWalkOMPSolver(g, vbyte, sample_steps, n_samples, n_threads); return 0; } From c6a9a73f60d87d6dad8c559095a5e5cea1978f51 Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Sun, 5 May 2024 11:59:03 -0400 Subject: [PATCH 27/28] Improve demo scripts in readme --- src/sampling_random_walk/README.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/sampling_random_walk/README.md b/src/sampling_random_walk/README.md index 847659c..db585b1 100644 --- a/src/sampling_random_walk/README.md +++ b/src/sampling_random_walk/README.md @@ -1,7 +1,25 @@ +### Compressed: + ``` -make rwalk_cpu && ../../bin/rwalk_cpu /home/lgaitskell/data-xhchen/tester/vbyte 3 40 +make rwalk_cpu && ../../bin/rwalk_cpu /home/lgaitskell/data-xhchen/tester/vbyte streamvbyte 30 4000000 + +make rwalk_cpu && ../../bin/rwalk_cpu /home/lgaitskell/data-xhchen/livej/dag-streamvbyte streamvbyte 30 4000000 + +make rwalk_omp && ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/livej/dag-streamvbyte streamvbyte 30 4000000 16 +``` + +### Uncompressed: -make rwalk_omp && ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/tester/vbyte 3 40 +``` +make rwalk_cpu && ../../bin/rwalk_cpu /home/lgaitskell/data-xhchen/livej/graph uncompressed 30 4000000 +## takes 6.86327 sec +make rwalk_omp && ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/livej/graph uncompressed 30 4000000 16 +## takes 8.76526 sec +``` + +``` +interact -n 16 -p parallel -t 2:00:00 +cd scratch4/GraphAIBench/src/sampling_random_walk/ -make rwalk_omp && ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/livej/dag-streamvbyte 30 4000000 +make rwalk_omp && /home/lgaitskell/intel/oneapi/vtune/2024.1/bin64/vtune -collect hotspots ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/livej/graph 30 4000000 16 ``` From c5b49f885da21b20301a6fb71fcad4e24fe512af Mon Sep 17 00:00:00 2001 From: Luc Gaitskell Date: Tue, 14 May 2024 18:15:56 -0400 Subject: [PATCH 28/28] Add list of datasets --- src/sampling_random_walk/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/sampling_random_walk/README.md b/src/sampling_random_walk/README.md index db585b1..3128e2b 100644 --- a/src/sampling_random_walk/README.md +++ b/src/sampling_random_walk/README.md @@ -23,3 +23,16 @@ cd scratch4/GraphAIBench/src/sampling_random_walk/ make rwalk_omp && /home/lgaitskell/intel/oneapi/vtune/2024.1/bin64/vtune -collect hotspots ../../bin/rwalk_omp /home/lgaitskell/data-xhchen/livej/graph 30 4000000 16 ``` + +Datasets: + +``` +~/data-xhchen/livej/graph +~/data-xhchen/orkut/graph +~/data-xhchen/twitter40/graph +~/data-xhchen/friendster/graph +~/data-xhchen/uk2007/graph +~/data-xhchen/gsh-2015/graph +~/data-xhchen/clueweb12/graph +~/data-xhchen/uk-2014-csgr/graph +```