-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLCCI1726-SparseSimilarity.go
More file actions
97 lines (91 loc) · 3.32 KB
/
LCCI1726-SparseSimilarity.go
File metadata and controls
97 lines (91 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package main
// 面试题 17.26. Sparse Similarity LCCI
// The similarity of two documents (each with distinct words) is defined to be the size of the intersection divided by the size of the union.
// For example, if the documents consist of integers, the similarity of {1, 5, 3} and {1, 7, 2, 3} is 0.4, because the intersection has size 2 and the union has size 5.
// We have a long list of documents (with distinct values and each with an associated ID) where the similarity is believed to be "sparse".
// That is, any two arbitrarily selected documents are very likely to have similarity 0.
// Design an algorithm that returns a list of pairs of document IDs and the associated similarity.
// Input is a 2D array docs, where docs[i] is the document with id i.
// Return an array of strings, where each string represents a pair of documents with similarity greater than 0.
// The string should be formatted as {id1},{id2}: {similarity}, where id1 is the smaller id in the two documents, and similarity is the similarity rounded to four decimal places.
// You can return the array in any order.
// Example:
// Input:
// [
// [14, 15, 100, 9, 3],
// [32, 1, 9, 3, 5],
// [15, 29, 2, 6, 8, 7],
// [7, 10]
// ]
// Output:
// [
// "0,1: 0.2500",
// "0,2: 0.1000",
// "2,3: 0.1429"
// ]
// Note:
// docs.length <= 500
// docs[i].length <= 500
// The number of document pairs with similarity greater than 0 will not exceed 1000.
import "fmt"
func computeSimilarities(docs [][]int) []string {
n := len(docs)
mp := make(map[int][]int)
arr := [][]int{}
for i:=0; i<n; i++ {
x := make([]int, n)
arr = append(arr, x)
}
for i := range docs {
for j := range docs[i] {
v := docs[i][j]
if _, ok := mp[v]; !ok {
mp[v] = []int{ i }
} else {
for _, x := range mp[v] {
arr[x][i]++
}
mp[v] = append(mp[v], i)
}
}
}
res := []string{}
for i := range arr {
for j := i + 1; j < n; j++ {
if arr[i][j] > 0 {
rate := float64(arr[i][j]) / float64(len(docs[i]) + len(docs[j]) - arr[i][j])
// golang的fmt.Sprintf中的小数截取操作是“四舍六入五成双”,1/32 = 0.03125 ,取四位小数是 0.0312, 因为5前面是偶数,后面没有更小位,所以被舍掉,但是题目是根据“四舍五入”算的
// 所以在rate后加入极小浮点数1e-9将这种结果进位,不然过不了13号测试结果
res = append(res, fmt.Sprintf("%d,%d: %.4f", i, j, rate + 1e-9))
}
}
}
return res
}
func main() {
// Example:
// Input:
// [
// [14, 15, 100, 9, 3],
// [32, 1, 9, 3, 5],
// [15, 29, 2, 6, 8, 7],
// [7, 10]
// ]
// Output:
// [
// "0,1: 0.2500",
// "0,2: 0.1000",
// "2,3: 0.1429"
// ]
// Note:
// docs.length <= 500
// docs[i].length <= 500
// The number of document pairs with similarity greater than 0 will not exceed 1000.
grid1 := [][]int{
{14, 15, 100, 9, 3},
{32, 1, 9, 3, 5},
{15, 29, 2, 6, 8, 7},
{7, 10},
}
fmt.Println(computeSimilarities(grid1)) // ["0,1: 0.2500", "0,2: 0.1000", "2,3: 0.1429"]
}