-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathipsum.m
More file actions
executable file
·154 lines (129 loc) · 4.26 KB
/
ipsum.m
File metadata and controls
executable file
·154 lines (129 loc) · 4.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
function[t] = ipsum(varargin)
%ipsum generate random filler text
%
%USAGE:
% t = ipsum('Property1', Value1, 'Property2', Value2, ..., 'PropertyN', ValueN);
%
%INPUTS:
% An (optional) sequence of Property-value pairs:
%
% + Paragraphs: The number of paragraphs to generate. (Default: 3)
% + Sentences: The mean number of sentences to write per paragraph.
% (Default: 4)
% + SentencesStd: Standard deviation of number of sentences/paragraph.
% (Default: 1)
% + Words: The mean number of words per sentence. (Default: 8)
% + WordsStd: Standard deviation of number of words/sentence.
% (Default: 2)
% + WordLength: The mean word length (number of characters).
% (Default: 6)
% + WordLengthStd: Standard deviation of word length. (Default: 2)
% + VowelsProp: Proportion of vowels per word. (Default: 0.5)
% + PunctuationProp: Proportion of words followed by punctuation.
% (Default: 0.05)
%
%OUTPUTS:
%
% t: A formatted text string.
%
%EXAMPLE USAGE:
%
% %Save output in filler.txt in the current working directory
% fid = fopen('filler.txt', 'w+');
% fprintf(fid, ipsum('Paragraphs',20,'Sentences',5));
% fclose(fid);
%
%SEE ALSO: FPRINTF
% 9/9/13 JRM Wrote it.
% 9/12/13 JRM Select characters according to English letter frequency
%#ok<*AGROW>
end_punctuation = ['.' '!' '?'];
end_punctuation_props = [0.9 0.05 0.05];
punctuation = [',' ';' ':'];
punctuation_props = [0.8 0.1 0.1];
args = parse_args(varargin{:});
t = [];
for i = 1:round(args.Paragraphs)
numSentences = max(round(args.SentencesStd*randn + args.Sentences), 1);
for j = 1:numSentences
numWords = max(round(args.WordsStd*randn + args.Words), 1);
t = [t, capitalize(generate_word(args))];
for k = 2:numWords
if (rand < args.PunctuationProp) && (k < numWords)
t = [t, slctrnd(punctuation, punctuation_props)];
end
t = [t, ' ', generate_word(args)];
end
t = [t, slctrnd(end_punctuation, end_punctuation_props), ' '];
end
if i == round(args.Paragraphs)
t = t(1:end-2);
else
t = [t, '\n\n'];
end
end
function[s] = capitalize(s)
s(1) = upper(s(1));
function[w] = generate_word(a)
letters = char((0:25) + 'a');
vowels = 'aeiou';
consonants = letters(~ismember(letters, vowels));
%source: http://en.wikipedia.org/wiki/Letter_frequency
freqs = [8.1670 1.4920 2.7820 4.2530 12.7020 2.2280 2.0150 6.0940 6.9660...
0.1530 0.7720 4.0250 2.4060 6.7490 7.5070 1.9290 0.0950 5.9870...
6.3270 9.0560 2.7580 0.9780 2.3600 0.1500 1.9740 0.0740];
vowel_freqs = freqs(ismember(letters, vowels));
cons_freqs = freqs(ismember(letters, consonants));
word_length = max(round(a.WordLengthStd*randn + a.WordLength), 1);
w = [];
if word_length > 1
for i = 1:word_length
if rand < a.VowelsProp
w = [w slctrnd(vowels, vowel_freqs)];
else
w = [w slctrnd(consonants, cons_freqs)];
end
end
else
w = slctrnd(vowels);
end
function[r] = slctrnd(list, props)
if ~exist('props', 'var')
props = ones(size(list))/length(list);
end
x = sample_from_dist(props);
r = list(x);
function[a] = parse_args(varargin)
assert((rem(length(varargin),2) == 0),'Arguments must be given in Property-Value pairs.');
a = get_defaults;
for i = 1:(length(varargin)/2)
nextField = varargin{(2*(i-1) + 1)};
assert(isfield(a, nextField), 'Unknown Property: ''%s''', nextField);
a.(nextField) = varargin{(2*(i-1) + 2)};
end
fnames = fieldnames(a);
for i = 1:length(fnames)
assert(length(a.(fnames{i})) == 1, '''%s'' must be scalar.', fnames{i});
assert(a.(fnames{i}) > 0, '''%s'' must be positive.', fnames{i});
end
function[da] = get_defaults()
da.Paragraphs = 3;
da.Sentences = 4;
da.SentencesStd = 1;
da.Words = 8;
da.WordsStd = 2;
da.WordLength = 6;
da.WordLengthStd = 2;
da.VowelsProp = 0.5;
da.PunctuationProp = 0.05;
function[y,bins] = sample_from_dist(ps)
ps = ps./sum(ps);
vals = 1:length(ps);
%construct bins, normalize
edges = [0 cumsum(ps)];
edges(end) = 1 + eps; %make the last bin slightly larger, since
%histc uses the "<" operator to compare
%values to upper edges of each bin.
%bin the random draw
[~,bins] = histc(rand,edges);
y = vals(bins);