-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproj-degreeDistros.py
More file actions
93 lines (71 loc) · 2.41 KB
/
proj-degreeDistros.py
File metadata and controls
93 lines (71 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Finds in-degree distribution, out-degree distribution,
# and reciprocal-degree distribution.
# SNAP folds reciprocal degrees into their in/out degree calculations
# which is less than ideal for this project
# first argument is input file name
# second argument is output file base name
# ONLY TESTED FOR PYTHON 3.3
from scipy import sparse
import numpy
import sys
def convertListToDistro(degreeList):
#takes row sum list and converts it to degree distribution
distro = {}
for value in degreeList:
try:
distro[value] += 1
except:
distro[value] = 1
return distro
def outputToFile(distroDict, outputFile):
for key in sorted(distroDict):
temp = str(int(key))+' '+str(distroDict[key])+'\n'
outputFile.write(temp)
#Read the edge list passed as first parameter created for networks
File = open(sys.argv[1],"r")
#fix nodes and edges for graph being read in
NODES = 81306
EDGES = 1768149
#declare sparse matrices
inDeg = sparse.lil_matrix((NODES+1,NODES+1))
outDeg = sparse.lil_matrix((NODES+1,NODES+1))
recipDeg = sparse.lil_matrix((NODES+1,NODES+1))
for line in File:
#split line
temp = line.split(' ')
edge1 = int(temp[0]) #follower
edge2 = int(temp[1]) #followed
#update in degree matrix
inDeg[edge2,edge1] = 1
#update out degree matrix
outDeg[edge1, edge2] = 1
#update reciprocal degree matrix if reciprocal degree exists
if (inDeg[edge1,edge2] == 1):
recipDeg[edge1,edge2] = 1
recipDeg[edge2,edge1] = 1
#ensure we are not double counting for all reciprocal edges
if (recipDeg[edge1,edge2] == 1):
inDeg[edge1,edge2] = 0
outDeg[edge2,edge1] = 0
inDeg[edge2,edge1] = 0
outDeg[edge1, edge2] = 0
#close file
File.close()
#get row sums of each matrix into lists
inDegList = numpy.array(inDeg.sum(1)).reshape(-1,).tolist()
outDegList = numpy.array(outDeg.sum(1)).reshape(-1,).tolist()
recipDegList = numpy.array(recipDeg.sum(1)).reshape(-1,).tolist()
#convert lists to proper degree distributions
inDegDistro = convertListToDistro(inDegList)
outDegDistro = convertListToDistro(outDegList)
recipDegDistro = convertListToDistro(recipDegList)
#output degree distributions to three files
inDegFile = open('indeg_'+sys.argv[2], 'w')
outDegFile = open('outdeg_'+sys.argv[2], 'w')
recipDegFile = open('recipdeg_'+sys.argv[2], 'w')
outputToFile(inDegDistro,inDegFile)
inDegFile.close()
outputToFile(outDegDistro,outDegFile)
outDegFile.close()
outputToFile(recipDegDistro,recipDegFile)
recipDegFile.close()