-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcountEntries.m
More file actions
executable file
·134 lines (106 loc) · 4.24 KB
/
countEntries.m
File metadata and controls
executable file
·134 lines (106 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
function [uniqueEntries,numberOfOccurences,whereIdx] = countEntries(m,isRow, keepNaN)
%COUNTENTRIES returns all unique entries (sorted) in the array m and how many times the respective entries occured
%
%SYNOPSIS [uniqueEntries,numberOfOccurences,whereIdx] = countEntries(m,isRow)
%
%INPUT m : any matrix (not cells or structs)
% isRow(opt) : should rows be counted or not [1/{0}]
% (if it's cols, transpose m before calling the function!)
% keepNaN (opt) : count NaN as entry? [{1}/0] If 0, NaNs (or
% NaN-containing rows) are removed after sorting, so
% that whereIdx still refers to the original position
% of the uniqueEntries in the input array.
%
%OUTPUT uniqueEntries : unique(m)
% if only one output argument is requested,
% countEntries returns [uniqueEntries,#ofOcc]
% numberOfOccurences : how many times the unique entries appear in m
% whereIdx : where in m do the entries appear? (m = uniqueEntries(whereIdx,:))
%
%
%c: 11/03, jonas
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%---test input
if iscell(m) || isstruct(m)
error('cells and structs are not supportet as input');
end
if nargin < 2 || isempty(isRow)
doRow = 0;
else
if isRow == 1;
doRow = 1;
elseif isRow == 0
doRow = 0;
else
error('input argument isRow has to be 1 or 0!')
end
end
if nargin < 3 || isempty(keepNaN)
keepNaN = true;
end
%---end test input
if ~doRow %do the fast method
%make m into a vector
m = m(:);
% new approach: remove NaNs, find uniques, use hist for counting, then
% use ismember and find(isnan()) for whereIdx if requested. The old
% approach won't work with the changes to unique :(
mFinite = m(isfinite(m));
uniqueEntries = unique(mFinite);
% use histc instead of hist, since the latter adds zeros if the entries
% are all equal and >1 (the bins are interpreted as number of bins, not
% a vector of bins!)
numberOfOccurences = histc(mFinite,uniqueEntries);
% check for inf
infIdx = m==inf;
if any(infIdx)
uniqueEntries(end+1) = inf;
numberOfOccurences(end+1) = sum(infIdx);
end
needWhere = nargout > 2;
nanIdx = isnan(m);
[~,whereIdx] = ismember(m(~nanIdx),uniqueEntries);
% add back NaNs at the end
if keepNaN
if any(nanIdx)
uniqueEntries(end+1) = NaN;
numberOfOccurences(end+1) = sum(nanIdx);
if needWhere
tmp = whereIdx;
whereIdx = ones(size(m))*length(uniqueEntries);
whereIdx(~nanIdx) = tmp;
end
end
end
else %do it the complicated way
%we do not care about the ordering of the matrix here: if the user
%specified rows, he/she wanted a columnVector as output (or should read the help)
[uniqueEntries, dummy, uniqueIdx] = unique(m,'rows');
%rember output
whereIdx = uniqueIdx;
if ~keepNaN
% remove NaN, inf
badIdx = find(any(~isfinite(uniqueEntries),2));
uniqueEntries(badIdx,:) = [];
whereIdx(ismember(whereIdx,badIdx)) = [];
uniqueIdx = whereIdx;
end
%uniqueIdx returns the indexList where uniqueEntriy #x occurs.
%We will now sort this list and take a diff to find where this index
%changes.
%adding zero and length(uniqueIndex) to the vector, we can now via
%another diff see how many entries there are (see example)
%example m: [11,11,22,33,33,22,22,22,44,11]
%corresponding uniqueEntries, uniqueIdx: [11,22,33,44] / [1 1 2 3 3 2 2 2 4 1]
%sort: [1 1 1 2 2 2 2 3 3 4]
sortedIdx = sort(uniqueIdx);
%diff: [0 0 1 0 0 0 1 0 1]
sortedIdxDiff = diff(sortedIdx);
%find and add entries: [0 3 7 9 10]
changeValueIdx = [0;find(sortedIdxDiff);length(uniqueIdx)];
%diff again for the numberOfOccurences: [3 4 2 1]
numberOfOccurences = diff(changeValueIdx);
end
if nargout < 2
uniqueEntries = [uniqueEntries,numberOfOccurences];
end