master-thesis/appendix.tex at master · Jovvik/master-thesis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
\chapter{Weighted \v{C}ech complex can be represented as a GDF}
\label{app:weighted_cech}

\begin{lemma}
    For any point cloud $P \subseteq \R^d$, a function $g : P \to \R_{\geq 0}$, and a
    parameter $q \in [1, \infty)$, the weighted \v{C}ech filtration
    $V^a_q[P, g]$ is equal to the sublevel set of the generalised density
    function
    \begin{equation}
        f_{g, q}(P, x) = \min_{p \in P} (d(x, p)^q + g(p)^q)^{1/q}.
    \end{equation}
\end{lemma}

\begin{proof}
    Fix $P, g, q$, and $a$. A point $x \in \R^d$ being included in $V^a_q[P, g]$
    is equivalent to the condition
    \begin{equation}
        \exists p \in P : d(x, p) \leq r_p^{(q)}(a).
    \end{equation}
    Suppose $a \geq g(p)$. Then, we have two cases:
    \begin{enumerate}
        \item $q = \infty$. Then, the condition is equivalent to
            \begin{equation}
                \exists p \in P : d(x, p) \leq a.
            \end{equation}
            The proposed GDF takes value
            \begin{equation}
                f_{g, q}(P, x) = \min_{p \in P} \max(d(x, p), g(p)),
            \end{equation}
            and the sublevel set $f^{-1}_{g, q, P}(-\infty, a]$ is precisely
            \begin{equation}
                \{x \in \R^d \mid \exists p \in P : \max(d(x, p), g(p)) \leq a\},
            \end{equation}
            which, due to $a \geq g(p)$, is the set
            \begin{equation}
                \{x \in \R^d \mid \exists p \in P : d(x, p) \leq a\},
            \end{equation}
            which is the same as $V^a_\infty[P, g]$.
        \item $q < \infty$. Then, the condition is equivalent to
            \begin{equation}
                \exists p \in P : d(x, p) \leq (a^q - g(p)^q)^{1/q}
            \end{equation}
            The proposed GDF takes value
            \begin{equation}
                f_{g, q}(P, x) = \min_{p \in P} (d(x, p)^q + g(p)^q)^{1/q},
            \end{equation}
            and the sublevel set $f^{-1}_{g, q, P}(-\infty, a]$ is precisely
            \begin{equation}
                \{x \in \R^d \mid \exists p \in P : (d(x, p)^q + g(p)^q)^{1/q} \leq a\},
            \end{equation}
            which can be seen to be the same with trivial algebraic manipulations:
            \begin{align}
                (d(x, p)^q + g(p)^q)^{1/q} & \leq a \\
                d(x, p)^q + g(p)^q & \leq a^q \\
                d(x, p)^q & \leq a^q - g(p)^q\\
                d(x, p) & \leq (a^q - g(p)^q)^{1 / q}
            \end{align}
    \end{enumerate}

    Now suppose $a < g(p)$. Then $r_p^{(q)}(a) = - \infty$, and thus the balls
    contain no points. As $(d(x, p)^q + g(p)^q)^{1/q} \geq g(p) > a$, the sublevel
    set is also empty.
\end{proof}

\chapter{The generalised \v{C}ech complex using the Mahalanobis distance kernel is stable}
\label{app:mahalanobis}

\begin{lemma}
    The generalised \v{C}ech complex using the Mahalanobis distance kernel
    \begin{equation}
        h(x, p, P) = \sqrt{(x - p)^\top (\Sigma(p, P) + \varepsilon I)^{-1} (x - p)} = \norm{A(p, P) (x - p)}_2,
    \end{equation}
    where $A(p, P) = (\Sigma(p, P) + \varepsilon I)^{-1/2}$ is stable.
\end{lemma}
\begin{proof}
    To use Theorem~\ref{thm:pc_kernel_stability}, we need to bound
    $|h(x, p, P) - h(x, q, P)|$ and $|h(x, p, P) - h(x, p, Q)|$.
    Bounding the first term, we have:
    \begin{align}
        & |h(x, p, P) - h(x, q, P)| \nonumber \\
        & = |\norm{A(p, P) (x - p)}_2 - \norm{A(q, P) (x - q)}_2| \\
        & \leq \norm{A(p, P) (x - p) - A(q, P) (x - q)}_2 \\
        & = \norm{A(p, P) (x - p) - A(q, P) (x - q) + A(p, P)(x - q) - A(p, P)(x - q)}_2 \\
        & = \norm{A(p, P) (q - p) + (A(p, P) - A(q, P))(x - q)}_2 \\
        & \leq \norm{A(p, P)(q - p)}_2 + \norm{(A(p, P) - A(q, P))(x - q)}_2 \\
        & \leq \norm{A(p, P)}_{\mathrm{op}} \cdot \norm{q - p}_2 + \norm{A(p, P) - A(q, P)}_{\mathrm{op}} \cdot \norm{x - q}_2. \label{eq:bound_h_diff}
    \end{align}
    The operator norm $\norm{A(p, P)}_{\mathrm{op}} = \sup_{v : \norm{v}_2 = 1}
    \norm{A(p, P)v}_2$ is precisely the largest eigenvalue of the matrix $A(p, P)$.
    As $\Sigma(p, P)$ is positive semi-definite, adding it to $\varepsilon I$ cannot
    decrease the eigenvalues, and thus
    $\norm{\Sigma(p, P) + \varepsilon I}_{\mathrm{op}} \geq \varepsilon$, which implies
    \begin{equation}
        \norm{A(p, P)}_{\mathrm{op}} \leq \varepsilon^{-1/2}.
    \end{equation}
    We now bound the second term, $\norm{A(p, P) - A(q, P)}_{\mathrm{op}}$.
    First we bound $\norm{A(p, P) - A(q, P)}_{\mathrm{op}}$ in terms of
    $\norm{\Sigma(p, P) - \Sigma(q, P)}_{\mathrm{op}}$.
    As $(\Sigma(p, P) + \varepsilon I)$ is positive definite for $\varepsilon > 0$,
    we can represent $A = (\Sigma + \varepsilon I)^{-1/2}$ as an integral:
    \begin{equation}
        (\Sigma + \varepsilon I)^{-1/2} = \frac{1}{\pi} \int_0^\infty t^{-1/2} (\Sigma + (\varepsilon + t)I)^{-1} \dd{t}.
    \end{equation}
    Using the identity $A^{-1} - B^{-1} = A^{-1}(B - A)B^{-1}$, we get:
    \begin{align}
        & A(p, P) - A(q, P) \nonumber \\
        & = \frac{1}{\pi} \int_0^\infty t^{-1/2} \frac{\Sigma(p, P) - \Sigma(q, P)}{(\Sigma(p, P) + (\varepsilon + t)I) (\Sigma(q, P) + (\varepsilon + t)I)} \dd{t}.
    \end{align}
    Taking the operator norm and using $\norm{\Sigma + (\varepsilon + t)I}_{\mathrm{op}} \geq \varepsilon + t$, we have:
    \begin{align}
        & ||A(p, P) - A(q, P)||_{\mathrm{op}} \nonumber \\
        & \leq \frac{1}{\pi} \int_0^\infty t^{-1/2} \norm{\Sigma(p, P) - \Sigma(q, P)}_{\mathrm{op}} \cdot (\varepsilon + t)^{-1} \cdot (\varepsilon + t)^{-1} \dd{t} \\
        & = \frac{1}{\pi} \norm{\Sigma(p, P) - \Sigma(q, P)}_{\mathrm{op}} \int_0^\infty t^{-1/2} (\varepsilon + t)^{-2} \dd{t} \\
        & = \norm{\Sigma(p, P) - \Sigma(q, P)}_{\mathrm{op}} \frac{1}{2} \varepsilon^{-3/2}.
    \end{align}
    Now we bound $\norm{\Sigma(p, P) - \Sigma(q, P)}_{\mathrm{op}}$.
    We can split the term $k(\Sigma(p, P) - \Sigma(q, P))$ into three sums:
    \begin{align}
        & k(\Sigma(p, P) - \Sigma(q, P)) \nonumber \\
        & = \sum_{x \in N_k(p, P) \cap N_k(q, P)}[(x - p)(x - p)^\top - (x - q)(x - q)^\top] + \nonumber \\
        & \sum_{x \in N_k(p, P) \setminus N_k(q, P)} (x - p)(x - p)^\top +
        \sum_{x \in N_k(q, P) \setminus N_k(p, P)} (x - q)(x - q)^\top.
    \end{align}
    The first term is bounded by
    \begin{equation}
        \sum_{x \in N_k(p, P) \cap N_k(q, P)} 2\norm{x - p}_2 \norm{p - q}_2 + \norm{p - q}_2^2 \leq k(2R_p \cdot d_H + d_H^2),
    \end{equation}
    where $R_p$ is the radius of the $k$-neighbourhood of $p$ and $d_H = d_H(P, Q)$.

    The other two terms are bounded by $k R_p^2$ and $k R_q^2$, respectively.

    Combining these bounds, we have:
    \begin{equation}
        \norm{\Sigma(p, P) - \Sigma(q, P)}_{\mathrm{op}} \leq 2R_p \cdot d_H + d_H^2 + R_p^2 + R_q^2,
    \end{equation}
    and combining this with the bound of $\norm{A(p, P) - A(q, P)}_{\mathrm{op}}$, we get:
    \begin{equation}
        \norm{A(p, P) - A(q, P)}_{\mathrm{op}} \leq \frac{1}{2} \varepsilon^{-3/2} (2R_p \cdot d_H + d_H^2 + R_p^2 + R_q^2).
    \end{equation}
    Finally, plugging this into
    Equation~\eqref{eq:bound_h_diff}, we have:
    \begin{equation}
        |h(x, p, P) - h(x, q, P)| \leq \varepsilon^{-1/2} d_H + \frac{1}{2} \varepsilon^{-3/2} (2R_p \cdot d_H + d_H^2 + R_p^2 + R_q^2) \cdot R_q.
    \end{equation}

    Via similar reasoning, we can bound $|h(x, p, P) - h(x, p, Q)$:
    \begin{align}
        |h(x, p, P) - h(x, p, Q)| & = |||\norm{A(p, P) (x - p)}_2 - \norm{A(p, Q) (x - p)}_2| \\
        & \leq \norm{A(p, P) (x - p) - A(p, Q) (x - p)}_2 \\
        & = \norm{(A(p, P) - A(p, Q)) (x - p)}_2 \\
        & \leq \norm{A(p, P) - A(p, Q)}_{\mathrm{op}} \cdot \norm{x - p}_2 \\
        & \leq \frac{1}{2} \varepsilon^{-3/2} d_H \cdot (R_p^2 + R_q^2) \cdot R_p.
    \end{align}

    By choosing a sufficiently large $\varepsilon$, we achieve the bounds needed
    by Theorem~\ref{thm:pc_kernel_stability}.
\end{proof}