faster implementation of erdos_renyi#150
faster implementation of erdos_renyi#150etiennedeg wants to merge 9 commits intoJuliaGraphs:masterfrom
Conversation
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## master #150 +/- ##
==========================================
+ Coverage 97.31% 97.36% +0.05%
==========================================
Files 120 120
Lines 6953 7175 +222
==========================================
+ Hits 6766 6986 +220
- Misses 187 189 +2 ☔ View full report in Codecov by Sentry. |
|
The problem this benchmark is, that you only benchmark if for The implementation we currently have in Graph.jl, can certainly speed up, and I think in some edges cases, there might even be error. But there are better algorithms than in this PR. I implemented something 4 years ago, but then got sidetracked, so I am not sure if it is correct, but I will attach that code, so you can take it or maybe use it for creating a better implementation. My memory is very shaky here, so there might be some errors:
And here is the code I once wrote, no guarantee on correctness: using LightGraphs
using Random
using Base: OneTo
function erdos_renyi_directed(nv::T, m::Integer; self_loops=false) where {T <: Integer}
nvi = Int(nv)
max_possible_edges = self_loops ? (nvi * nvi) : (nvi * (nvi - 1))
@assert 0 <= m <= max_possible_edges
fadjlist = Vector{Vector{T}}(undef, nvi)
taken = 0
remaining = max_possible_edges
u = 1
v = 1
buffer = Vector{T}(undef, nv)
buffer_len = 0
@inbounds while u <= nv
v += ifelse(!self_loops && u == v, 1, 0)
if v > nv
fadjlist[u] = buffer[OneTo(buffer_len)]
u += 1
v = 1
buffer_len = 0
end
if rand() * remaining < (m - taken)
buffer_len += 1
buffer[buffer_len] = v
taken += 1
end
v += 1
remaining -= 1
end
# reuse the memory allocated for buffer
indeg = buffer
fill!(indeg, zero(T))
@inbounds for u in OneTo(nv)
for v in fadjlist[u]
indeg[v] += 1
end
end
badjlist = Vector{Vector{T}}(undef, nvi)
@inbounds for v in OneTo(nv)
badjlist[v] = Vector{T}(undef, indeg[v])
end
@inbounds for u in OneTo(nv)
for v in fadjlist[u]
badjlist[v][end - indeg[v] + 1] = u
indeg[v] -= 1
end
end
return SimpleDiGraph(m, fadjlist, badjlist)
end
function erdos_renyi_directed(nv::T, p::Union{Rational, AbstractFloat, AbstractIrrational}; self_loops=false) where {T <: Integer}
@assert 0 <= p <= 1
fadjlist = Vector{Vector{T}}(undef, nv)
ne = 0
buffer = Vector{T}(undef, nv)
sample_from = self_loops ? collect(one(T):nv) : collect(T(2):nv)
@inbounds for u in OneTo(nv)
randsubseq!(buffer, sample_from, p)
fadjlist[u] = copy(buffer)
ne += length(buffer)
if !self_loops
sample_from[u] = u
end
end
# reuse the memory allocated for buffer
# need to resize! because randsubseq! could have shrunken the buffer length
indeg = resize!(buffer, nv)
fill!(indeg, zero(T))
@inbounds for u in OneTo(nv)
for v in fadjlist[u]
indeg[v] += 1
end
end
badjlist = Vector{Vector{T}}(undef, nv)
@inbounds for v in OneTo(nv)
badjlist[v] = Vector{T}(undef, indeg[v])
end
@inbounds for u in OneTo(nv)
for v in fadjlist[u]
badjlist[v][end - indeg[v] + 1] = u
indeg[v] -= 1
end
end
return SimpleDiGraph(ne, fadjlist, badjlist)
end
function erdos_renyi_undirected(nv::T, m::Integer; self_loops=false) where {T <: Integer}
nvi = Int(nv)
max_possible_edges = self_loops ? (nvi * (nvi + 1)) ÷ 2 : (nvi * (nvi - 1)) ÷ 2
@assert 0 <= m <= max_possible_edges
fadjlist = Vector{Vector{T}}(undef, nvi)
taken = 0
remaining = max_possible_edges
u = 1
v = self_loops ? one(T) : T(2)
buffer = Vector{T}(undef, nv)
buffer_len = 0
indeg = zeros(T, nv)
@inbounds while u <= nv
if v > nv
indeg_u = indeg[u]
list_u = Vector{T}(undef, indeg_u + buffer_len)
for i in OneTo(buffer_len)
w = buffer[i]
list_u[i + indeg_u] = w
indeg[w] += 1
end
fadjlist[u] = list_u
u += 1
v = ifelse(self_loops, u, u + 1)
buffer_len = 0
end
if rand() * remaining < (m - taken)
buffer_len += 1
buffer[buffer_len] = v
taken += 1
end
v += 1
remaining -= 1
end
insert_at = resize!(buffer, nv)
fill!(insert_at, one(T))
@inbounds for u in OneTo(nv)
list_u = fadjlist[u]
for i in (indeg[u] + 1):length(list_u)
v = list_u[i]
fadjlist[v][insert_at[v]] = u
insert_at[v] += 1
end
end
return SimpleGraph(m, fadjlist)
end
@inline function _sample_with_replacement!(buffer, sample_from, p, buffer_offset=0)
buffer_len = 0
if p >= 0.5
@inbounds for v in sample_from
if rand() < p
buffer_len += 1
buffer[buffer_len + buffer_offset] = v
end
end
else
L = -1 / log1p(-p)
sample_from_len = length(sample_from)
i = 0
@inbounds while true
i += floor(Int, randexp() * L) + 1
i > sample_from_len && return buffer_len
buffer_len += 1
buffer[buffer_len + buffer_offset] = sample_from[i]
end
end
return buffer_len
end
function erdos_renyi_undirected(nv::T, p::Union{Rational, AbstractFloat, AbstractIrrational}; self_loops=false) where {T <: Integer}
@assert 0 <= p <= 1
fadjlist = Vector{Vector{T}}(undef, nv)
ne = 0
buffer = Vector{T}(undef, nv)
indeg = zeros(T, nv)
@inbounds for u in OneTo(nv)
sample_from = self_loops ? (u:nv) : (u+one(T):nv)
buffer_len = _sample_with_replacement!(buffer, sample_from, p)
indeg_u = indeg[u]
list_u = Vector{T}(undef, indeg_u + buffer_len)
for i in OneTo(buffer_len)
v = buffer[i]
list_u[i + indeg_u] = v
indeg[v] += 1
end
fadjlist[u] = list_u
ne += buffer_len
end
insert_at = resize!(buffer, nv)
fill!(insert_at, one(T))
@inbounds for u in OneTo(nv)
list_u = fadjlist[u]
for i in (indeg[u] + 1):length(list_u)
v = list_u[i]
fadjlist[v][insert_at[v]] = u
insert_at[v] += 1
end
end
return SimpleGraph(ne, fadjlist)
end |
|
Oh ok, I see the point of using the binomial distribution. The biggest problem here is the use of |
|
I mean, if we have some benchmarks, that show that this implementation is also faster for very small and very large |
|
The following code implements the good strategy above for (n,p) graphs. If there's interest I can prepare a PR. function trianglemap(r)
j = floor(Int, (1+sqrt(8r-7))/2) + 1
i = r - binomial(j-1,2)
i, j
end
function nondiagmap(r,n)
j = div(r-1, n-1)
i = r - j*(n-1)
j += 1
i += (i >= j)
i, j
end
function erdos_renyi(
n::Integer,
p::Real;
is_directed=false,
rng::Union{Nothing,AbstractRNG}=nothing,
seed::Union{Nothing,Integer}=nothing,
)
rng = rng_from_rng_or_seed(rng, seed)
m = is_directed ? n*(n-1) : binomial(n,2)
R = randsubseq(rng, 1:m, min(1.0, p))
g = if is_directed
SimpleDiGraphFromIterator(Edge(nondiagmap(r,n)...) for r in R)
else
SimpleGraphFromIterator(Edge(trianglemap(r)...) for r in R)
end
add_vertices!(g, n - nv(g))
return g
endTo compare I've used for N in (500, 5000), is_directed in (true, false), p in (10/N, 0.5, 1-10/N)
@btime erdos_renyi($N, $p; is_directed=$is_directed)
endThat gives which seems uniformly better than master: |
|
I went ahead and opened #212 |
fd6960d to
6caf058
Compare
|
I added the implementation of @simonschoelly for It misses some tests. I think a necessary test is to ensure the generated graphs have a coherent structure. For test about assessing the distribution, I still don't know the best way to test for it. |
Great! Note that there are implementations of the various algorithms by Vitter, including this one, in StatsBase. Best. |
|
Thanks, I will take a look. Maybe better to not add a dependency here, the algorithm is sufficiently simple to be contained in Graphs.jl |
|
@gdalle This should be ready for review. I'm unable to reach this last branch for codecov Edit: This is triggered non-deterministically and very rarely, I don't think it makes sense to try to cover it in the tests |
Master :
This commit :