-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathmessage.go
More file actions
122 lines (97 loc) · 3.53 KB
/
message.go
File metadata and controls
122 lines (97 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package gosms
import (
"errors"
"unicode"
)
// ErrNotSplittable indicates that the given message cannot be split with the given encoder and message length
var ErrNotSplittable = errors.New("the message cannot be split with the given encoder and message length")
// willMessageFit checks to see it a message will fit in a messageLength space without being split
func willMessageFit(message []rune, encoder Encoder, messageLength int) (bool, error) {
var codePoints int
for _, char := range message {
// Some encodings have variable lengthed characters
charPoints, err := encoder.GetCodePoints(char)
if err != nil {
return false, ErrNotEncodable
}
codePoints += charPoints
if codePoints > messageLength {
return false, nil
}
}
return true, nil
}
// Returns true if it is safe to split a message before char
func canSplitBefore(char rune) bool {
// Numbers, graphics and words should not be split if possible.
// Marks include accents which should stay connected to letters
// Symbols include emojis, which we should avoid splitting if possible
// Control Characters are not a part of words, numbers or graphics.
// Spaces are not a part of words, numbers. They are usually used to delimit words
// Punctuation is not a part of words or numbers, however it should be appended
// to whatever words or numbers precede it
return unicode.IsControl(char) || unicode.IsSpace(char)
}
// Returns true if it is safe to split a message after char
func canSplitAfter(char rune) bool {
// Numbers, graphics and words should not be split if possible.
// Marks include accents which should stay connected to letters
// Symbols include emojis, which we should avoid splitting if possible
// Control Characters are not a part of words, numbers or graphics.
// Spaces are not a part of words, numbers. They are usually used to delimit words
// Punctuation is not a part of words or numbers
return unicode.IsControl(char) || unicode.IsSpace(char) || unicode.IsPunct(char)
}
// SplitMessage splits a message into parts with a maximum length of messageLength
// code points. Word splitting is avoided.
func SplitMessage(message []rune, encoder Encoder, messageLength int) ([]string, error) {
var messageParts []string
var messagePart []rune
var codePoints int
var lastSplitPoint = -1 // no valid split point
for idx := 0; idx < len(message); idx++ {
var char = message[idx]
// Some encodings have variable lengthed characters
charPoints, err := encoder.GetCodePoints(char)
if err != nil {
return nil, ErrNotEncodable
}
codePoints += charPoints
// check for split point
if canSplitBefore(char) {
lastSplitPoint = len(messagePart)
}
// if the SMS is full
if codePoints > messageLength {
// if the split is impossible
if len(messagePart) == 0 {
return nil, ErrNotSplittable
}
// split at the last valid point
if lastSplitPoint == -1 {
lastSplitPoint = len(messagePart)
}
// recover dropped characters
idx -= (len(messagePart) - lastSplitPoint)
messagePart = messagePart[0:lastSplitPoint]
// save message part
messageParts = append(messageParts, string(messagePart))
// reset
messagePart = nil
codePoints = 0
lastSplitPoint = -1
// try adding char again with fresh message part
idx--
continue
}
// add char to message part
messagePart = append(messagePart, char)
// check for split point
if canSplitAfter(char) {
lastSplitPoint = len(messagePart)
}
}
// save last message part
messageParts = append(messageParts, string(messagePart))
return messageParts, nil
}