diff --git a/mpb/Makefile.am b/mpb/Makefile.am index 4e097a3f..8e1a3ac8 100644 --- a/mpb/Makefile.am +++ b/mpb/Makefile.am @@ -20,7 +20,7 @@ EXTRA_DIST = mpb.scm.in epsilon.c mu.c nodist_pkgdata_DATA = $(SPECIFICATION_FILE) -MY_SOURCES = medium.c epsilon_file.c field-smob.c fields.c \ +MY_SOURCES = bott.c medium.c epsilon_file.c field-smob.c fields.c \ material_grid.c material_grid_opt.c matrix-smob.c mpb.c field-smob.h matrix-smob.h mpb.h my-smob.h MY_LIBS = $(top_builddir)/src/matrixio/libmatrixio.a $(top_builddir)/src/libmpb@MPB_SUFFIX@.la $(NLOPT_LIB) diff --git a/mpb/bott.c b/mpb/bott.c new file mode 100644 index 00000000..90619cf3 --- /dev/null +++ b/mpb/bott.c @@ -0,0 +1,128 @@ +/* Copyright (C) 1999-2014 Massachusetts Institute of Technology. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "config.h" + +#include +#include +#include +#include + +#include "mpb.h" + +sqmatrix shift_overlap(int band_start, int n_bands, int s1, int s2, int s3) +{ + sqmatrix U, S1, S2; + int final_band = band_start + n_bands, ib; + + CHECK(mdata, "init-params must be called before shift_overlap"); + CHECK(band_start + n_bands <= num_bands, "not enough bands in shift_overlap"); + + U = create_sqmatrix(n_bands); + + /* ...we have to do this in blocks of eigensolver_block_size since + the work matrix W[0] may not have enough space to do it at once. */ + S1 = create_sqmatrix(n_bands); + S2 = create_sqmatrix(n_bands); + + for (ib = band_start; ib < final_band; ib += Hblock.alloc_p) { + if (ib + Hblock.alloc_p > final_band) { + maxwell_set_num_bands(mdata, final_band - ib); + evectmatrix_resize(&Hblock, final_band - ib, 0); + } + maxwell_compute_H_from_shifted_B(mdata, H, Hblock, + (scalar_complex *) mdata->fft_data, + s1, s2, s3, + ib, 0, Hblock.p); + + evectmatrix_XtY_slice2(U, H, Hblock, band_start, 0, n_bands, Hblock.p, + ib-band_start, S1, S2); + } + + /* Reset scratch matrix sizes: */ + evectmatrix_resize(&Hblock, Hblock.alloc_p, 0); + maxwell_set_num_bands(mdata, Hblock.alloc_p); + + destroy_sqmatrix(S2); + destroy_sqmatrix(S1); + return U; +} + +/* complex argument */ +static double scalar_carg(scalar_complex z) +{ + return atan2(CSCALAR_IM(z), CSCALAR_RE(z)); +} + +number_list bott_indices(integer band_start, integer n_bands) +{ + sqmatrix U, W, S1, S2, Un, Wn; + scalar_complex *eigvals; + number_list bott; + int i, n; + + CHECK(mdata, "init-params must be called before shift_overlap"); + CHECK(mdata->nz == 1, "Bott index is not defined (yet) for 3d"); + + U = shift_overlap(band_start-1, n_bands, 0, 1, 0); + W = shift_overlap(band_start-1, n_bands, 1, 0, 0); + + /* allocate scratch arrays */ + S1 = create_sqmatrix(n_bands); + S2 = create_sqmatrix(n_bands); + Un = create_sqmatrix(n_bands); + Wn = create_sqmatrix(n_bands); + CHK_MALLOC(eigvals, scalar_complex, n_bands); + + bott.num_items = n_bands; + CHK_MALLOC(bott.items, number, n_bands); + for (n = 1; n <= n_bands; ++n) { /* compute n-th bott index */ + Un.p = Wn.p = n_bands; /* we change the .p field at every iteration (cf. resizing), but sqmatrix expects */ + /* the .p field to match the size of the matrix being copied; fix that here */ + sqmatrix_copy(Un, U); + sqmatrix_copy(Wn, W); + + sqmatrix_resize(&Un, n, 1); + sqmatrix_resize(&Wn, n, 1); + sqmatrix_resize(&S1, n, 0); /*the .p-field-issue has no impact on the scratch */ + sqmatrix_resize(&S2, n, 0); /*matrices, as their initial value is unimportant */ + + sqmatrix_AeBC(S1, Wn, 0, Un, 0); /*do the matrix products*/ + sqmatrix_AeBC(S2, Wn, 1, Un, 1); + sqmatrix_AeBC(Un, S1, 0, S2, 0); + + sqmatrix_eigenvalues(Un, eigvals); + + bott.items[n-1] = 0; /* initalize/zero (number_list gives random init) */ + for (i = 0; i < n; ++i) + bott.items[n-1] += scalar_carg(eigvals[i]); + bott.items[n-1] /= 6.28318530717958647692528676655900; /* twopi */ + } + + destroy_sqmatrix(Wn); + destroy_sqmatrix(Un); + destroy_sqmatrix(S2); + destroy_sqmatrix(S1); + destroy_sqmatrix(W); + destroy_sqmatrix(U); + free(eigvals); + return bott; +} diff --git a/mpb/fields.c b/mpb/fields.c index 6e93af28..70137132 100644 --- a/mpb/fields.c +++ b/mpb/fields.c @@ -125,14 +125,7 @@ void get_hfield(integer which_band) curfield = (scalar_complex *) mdata->fft_data; curfield_band = which_band; curfield_type = 'h'; - if (mdata->mu_inv == NULL) - maxwell_compute_h_from_H(mdata, H, curfield, which_band - 1, 1); - else { - evectmatrix_resize(&W[0], 1, 0); - maxwell_compute_H_from_B(mdata, H, W[0], curfield, which_band-1,0, 1); - maxwell_compute_h_from_H(mdata, W[0], curfield, 0, 1); - evectmatrix_resize(&W[0], W[0].alloc_p, 0); - } + maxwell_compute_h_from_B(mdata, H, curfield, which_band - 1, 1); /* Divide by the cell volume so that the integral of H*B or of D*E is unity. (From the eigensolver + FFT, they are diff --git a/mpb/mpb.scm.in b/mpb/mpb.scm.in index 31fe1dbb..998f3e2a 100644 --- a/mpb/mpb.scm.in +++ b/mpb/mpb.scm.in @@ -22,7 +22,7 @@ ; this case, we assume that the procedure returns 1 argument, ; as this is the most useful default for our purposes. Sigh. -(define (procedure-num-args p) +(define (procedure-num-args p) (let ((arity (procedure-property p 'arity))) (if arity (car arity) 1))) @@ -76,9 +76,9 @@ (lambda (object) (let ((sz (object-property-value object 'size)) (i (object-property-value object 'matgrid-init))) - (let ((g (apply make-uniform-array + (let ((g (apply make-uniform-array (cons 0.333 (map inexact->exact (vector->list sz)))))) - (array-index-map! g (lambda (x y z) + (array-index-map! g (lambda (x y z) (i (- (/ x (vector3-x sz)) 0.5) (- (/ y (vector3-y sz)) 0.5) (- (/ z (vector3-z sz)) 0.5)))) @@ -165,7 +165,7 @@ (define TE EVEN-Z) (define TM ODD-Z) (define PREV-PARITY -1) -(define-external-function set-parity false false +(define-external-function set-parity false false no-return-value 'integer) (define set-polarization set-parity) ; backwards compatibility @@ -186,12 +186,12 @@ (define-external-function get-epsilon false false no-return-value) (define-external-function get-mu false false no-return-value) (define-external-function fix-field-phase false false no-return-value) -(define-external-function compute-field-energy false false +(define-external-function compute-field-energy false false (make-list-type 'number)) (define-external-function compute-field-divergence false false no-return-value) (define-external-function get-epsilon-point false false 'number 'vector3) -(define-external-function get-epsilon-inverse-tensor-point false false +(define-external-function get-epsilon-inverse-tensor-point false false 'cmatrix3x3 'vector3) (define-external-function get-energy-point false false 'number 'vector3) (define get-scalar-field-point get-energy-point) @@ -210,7 +210,7 @@ 'number (make-list-type 'geometric-object)) (define-external-function output-field-to-file false false - no-return-value 'integer 'string) + no-return-value 'integer 'string) (define-external-function mpi-is-master? false false 'boolean) (define-external-function using-mpi? false false 'boolean) @@ -226,7 +226,7 @@ no-return-value 'integer) (define-external-function sqmatrix-size false false 'integer 'SCM) -(define-external-function sqmatrix-ref false false 'cnumber +(define-external-function sqmatrix-ref false false 'cnumber 'SCM 'integer 'integer) (define-external-function sqmatrix-mult false false 'SCM 'SCM 'SCM) @@ -249,6 +249,8 @@ 'string) (define-external-function load-eigenvectors false false no-return-value 'string) +(define-external-function bott-indices false false (make-list-type 'number) + 'integer 'integer) (define cur-field 'cur-field) (define-external-function cur-field? false false 'boolean 'SCM) @@ -261,19 +263,19 @@ (define-external-function field-set! false false no-return-value 'SCM 'SCM) (define (field-copy f) (let ((f' (field-make f))) (field-set! f' f) f')) (define-external-function field-load false false no-return-value 'SCM) -(define-external-function field-mapL! false false no-return-value 'SCM +(define-external-function field-mapL! false false no-return-value 'SCM 'function (make-list-type 'SCM)) (define (field-map! dest f . src) (apply field-mapL! (list dest f src))) (define-external-function integrate-fieldL false false 'cnumber 'function (make-list-type 'SCM)) (define (integrate-fields f . src) (apply integrate-fieldL (list f src))) -(define-external-function rscalar-field-get-point false false 'number +(define-external-function rscalar-field-get-point false false 'number 'SCM 'vector3) -(define-external-function cscalar-field-get-point false false 'cnumber +(define-external-function cscalar-field-get-point false false 'cnumber 'SCM 'vector3) -(define-external-function cvector-field-get-point false false 'cvector3 +(define-external-function cvector-field-get-point false false 'cvector3 'SCM 'vector3) -(define-external-function cvector-field-get-point-bloch false false 'cvector3 +(define-external-function cvector-field-get-point-bloch false false 'cvector3 'SCM 'vector3) (define-external-function randomize-material-grid! false false @@ -398,7 +400,7 @@ (define (try+ k v) (if (< (n (vector3+ k v)) (n k)) (try+ (vector3+ k v) v) k)) (define (try k v) (try+ (try+ k v) (vector3- (vector3 0) v))) - (define trylist (list + (define trylist (list #(1 0 0) #(0 1 0) #(0 0 1) #(0 1 1) #(1 0 1) #(1 1 0) #(0 1 -1) #(1 0 -1) #(1 -1 0) @@ -483,7 +485,7 @@ (cons (car freqs) k-point) (car br))) (newmax (if (> (car freqs) (cadr br)) (cons (car freqs) k-point) (cdr br)))) - (ubrd br-rest (cdr freqs) + (ubrd br-rest (cdr freqs) (cons (cons newmin newmax) br-start)))))) (ubrd band-range-data freqs '())) @@ -552,7 +554,7 @@ (let ((median-iters (* 0.5 (+ (list-ref sorted-iters (quotient num-runs 2)) (list-ref sorted-iters - (- (quotient + (- (quotient (+ num-runs 1) 2) 1)))))) (print ", median = " median-iters)))) @@ -611,7 +613,7 @@ (let ((k-split (list-split k-points k-split-num k-split-index))) (set-kpoint-index (car k-split)) (if (zero? (car k-split)) - (begin + (begin (output-epsilon) ; output epsilon immediately for 1st k block (if (using-mu?) (output-mu)))) ; and mu too, if we have it (if (> num-bands 0) @@ -620,7 +622,7 @@ (set! current-k k) (begin-time "elapsed time for k point: " (solve-kpoint k)) (set! all-freqs (cons freqs all-freqs)) - (set! band-range-data + (set! band-range-data (update-band-range-data band-range-data freqs k)) (set! eigensolver-iters (append eigensolver-iters @@ -841,7 +843,7 @@ (define korig (if (pair? korig-and-kdir) (car korig-and-kdir) (vector3 0))) (define kdir (if (pair? korig-and-kdir) (cdr korig-and-kdir) korig-and-kdir)) (let ((num-bands-save num-bands) (k-points-save k-points) - (nb (- band-max band-min -1)) + (nb (- band-max band-min -1)) (kdir1 (cartesian->reciprocal (unit-vector3 (reciprocal->cartesian kdir)))) ; k0s is an array caching the best k value found for each band: (k0s (if (list? kmag-guess) (list->vector kmag-guess) @@ -850,7 +852,7 @@ (bktab '())) (define (rootfun b) (lambda (k) (let ((tab-val (assoc (cons b k) bktab))) ; first, look in cached table - (if tab-val + (if tab-val (begin ; use cached result if available (print "find-k " b " at " k ": " (cadr tab-val) " (cached)\n") (cdr tab-val)) @@ -861,7 +863,7 @@ (let ((v (compute-group-velocity-component kdir1))) ; cache computed values: (map (lambda (b f v) - (let ((tabval (assoc + (let ((tabval (assoc (cons b (vector-ref k0s (- b band-min))) bktab))) (if (or (not tabval) @@ -869,7 +871,7 @@ (vector-set! k0s (- b band-min) k))) ; cache k0 (set! bktab (cons (cons (cons b k) (cons (- f omega) v)) bktab))) - (arith-sequence band-min 1 (- b band-min -1)) + (arith-sequence band-min 1 (- b band-min -1)) (ncdr (- band-min 1) freqs) (ncdr (- band-min 1) v)) ; finally return (frequency - omega . derivative): @@ -889,7 +891,7 @@ (run-parity p false (lambda (b') (if (= b' b) - (map (lambda (f) + (map (lambda (f) (apply-band-func-thunk f b true)) band-funcs))))) (arith-sequence band-max -1 nb) (reverse ks))) @@ -898,7 +900,7 @@ (print parity "kvals:, " omega ", " band-min ", " band-max) (vector-map (lambda (k) (print ", " k)) korig) (vector-map (lambda (k) (print ", " k)) kdir1) - (map (lambda (k) (print ", " k)) ks) + (map (lambda (k) (print ", " k)) ks) (print "\n") ks))) @@ -912,7 +914,7 @@ (let ((dots (dot-eigenvectors old-eigs first-band))) (let ((phases (map (lambda (d) (conj (make-polar 1 (angle d)))) (sqmatrix-diag dots)))) - (map (lambda (i phase) + (map (lambda (i phase) (scale-eigenvector i phase) (conj phase)) (arith-sequence first-band 1 (length phases)) phases)))) diff --git a/src/matrices/sqmatrix.c b/src/matrices/sqmatrix.c index 82bb5e01..e02cea63 100644 --- a/src/matrices/sqmatrix.c +++ b/src/matrices/sqmatrix.c @@ -60,7 +60,7 @@ void sqmatrix_assert_hermitian(sqmatrix A) /* A = B */ void sqmatrix_copy(sqmatrix A, sqmatrix B) { - CHECK(A.p == B.p, "arrays not conformant"); + CHECK(A.p == B.p, "arrays not conformant"); blasglue_copy(A.p * A.p, B.data, 1, A.data, 1); } @@ -75,7 +75,7 @@ void sqmatrix_resize(sqmatrix *A, int p, short preserve_data) if (preserve_data) { int i, j; - + if (p < A->p) { for (i = 0; i < p; ++i) for (j = 0; j < p; ++j) @@ -91,7 +91,7 @@ void sqmatrix_resize(sqmatrix *A, int p, short preserve_data) A->p = p; } -/* U contains the upper triangle of a Hermitian matrix; we copy this +/* U contains the upper triangle of a Hermitian matrix; we copy this to F and also fill in the lower triangle with the adjoint of the upper. */ void sqmatrix_copy_upper2full(sqmatrix F, sqmatrix U) { @@ -115,10 +115,10 @@ void sqmatrix_symmetrize(sqmatrix Asym, sqmatrix A) CHECK(Asym.p == A.p, "arrays not conformant"); - for (i = 0; i < A.p; ++i) + for (i = 0; i < A.p; ++i) for (j = 0; j < A.p; ++j) { int ij = i * A.p + j, ji = j * A.p + i; - ASSIGN_SCALAR(Asym.data[ij], + ASSIGN_SCALAR(Asym.data[ij], 0.5 * (SCALAR_RE(A.data[ij]) + SCALAR_RE(A.data[ji])), 0.5 * (SCALAR_IM(A.data[ij]) - @@ -151,7 +151,7 @@ scalar sqmatrix_traceAtB(sqmatrix A, sqmatrix B) return trace; } -/* A = B * C. If bdagger != 0, then adjoint(B) is used; similarly for C. +/* A = B * C. If bdagger != 0, then adjoint(B) is used; similarly for C. A must be distinct from B and C. Note that since the matrices are stored in row-major order, the most efficient operation should be B * adjoint(C), assuming the BLAS is sane. i.e. if C is hermitian, @@ -196,7 +196,7 @@ void sqmatrix_aApbB(real a, sqmatrix A, real b, sqmatrix B) } /* U <- 1/U. U must be Hermitian and, if positive_definite != 0, - positive-definite (e.g. U = Yt*Y). Work is a scratch matrix. + positive-definite (e.g. U = Yt*Y). Work is a scratch matrix. Returns 1 on success, 0 if failure (e.g. matrix singular) */ int sqmatrix_invert(sqmatrix U, short positive_definite, sqmatrix Work) @@ -207,14 +207,14 @@ int sqmatrix_invert(sqmatrix U, short positive_definite, if (positive_definite) { /* factorize U: */ if (!lapackglue_potrf('U', U.p, U.data, U.p)) return 0; - + /* QUESTION: would it be more efficient to stop here, returning the Cholesky factorization of U? This could then be used to multiply by 1/U without ever calculating the inverse explicitly. It would probably be more numerically stable, but how do the computational costs compare? */ - + /* Compute 1/U (upper half only) */ if (!lapackglue_potri('U', U.p, U.data, U.p)) return 0; } @@ -230,10 +230,10 @@ int sqmatrix_invert(sqmatrix U, short positive_definite, /* Compute 1/U (upper half only) */ if (!lapackglue_hetri('U', U.p, U.data, U.p, ipiv, Work.data)) return 0; - + free(ipiv); } - + /* Now, copy the conjugate of the upper half onto the lower half of U */ for (i = 0; i < U.p; ++i) @@ -312,7 +312,7 @@ void sqmatrix_eigenvalues(sqmatrix A, scalar_complex *eigvals) destroy_sqmatrix(B); } -/* Compute Usqrt <- sqrt(U), where U must be Hermitian positive-definite. +/* Compute Usqrt <- sqrt(U), where U must be Hermitian positive-definite. W is a work array, and must be the same size as U. Both U and W are overwritten. */ void sqmatrix_sqrt(sqmatrix Usqrt, sqmatrix U, sqmatrix W) @@ -328,12 +328,12 @@ void sqmatrix_sqrt(sqmatrix Usqrt, sqmatrix U, sqmatrix W) { int i; - + /* Compute W = diag(sqrt(eigenvals)) * U; i.e. the rows of W become the rows of U times sqrt(corresponding eigenvalue) */ for (i = 0; i < U.p; ++i) { CHECK(eigenvals[i] > 0, "non-positive eigenvalue"); - + blasglue_copy(U.p, U.data + i*U.p, 1, W.data + i*U.p, 1); blasglue_rscal(U.p, sqrt(eigenvals[i]), W.data + i*U.p, 1); } diff --git a/src/maxwell/maxwell.h b/src/maxwell/maxwell.h index cf93248f..99456e69 100644 --- a/src/maxwell/maxwell.h +++ b/src/maxwell/maxwell.h @@ -103,7 +103,7 @@ typedef struct { int nplans, plans_howmany[MAX_NPLANS], plans_stride[MAX_NPLANS], plans_dist[MAX_NPLANS]; scalar *fft_data, *fft_data2; - + int zero_k; /* non-zero if k is zero (handled specially) */ k_data *k_plus_G; real *k_plus_G_normsqr; @@ -153,7 +153,7 @@ extern void set_maxwell_mu(maxwell_data *md, maxwell_dielectric_function mu, maxwell_dielectric_mean_function mmu, void *mu_data); - + extern void maxwell_sym_matrix_eigs(real eigs[3], const symmetric_matrix *V); extern void maxwell_sym_matrix_invert(symmetric_matrix *Vinv, const symmetric_matrix *V); @@ -162,7 +162,7 @@ extern void maxwell_sym_matrix_rotate(symmetric_matrix *RAR, double R[3][3]); extern int maxwell_sym_matrix_positive_definite(symmetric_matrix *V); -extern void maxwell_compute_fft(int dir, maxwell_data *d, +extern void maxwell_compute_fft(int dir, maxwell_data *d, scalar *array_in, scalar *array_out, int howmany, int stride, int dist); extern void maxwell_compute_d_from_H(maxwell_data *d, evectmatrix Xin, @@ -171,10 +171,18 @@ extern void maxwell_compute_d_from_H(maxwell_data *d, evectmatrix Xin, extern void maxwell_compute_h_from_H(maxwell_data *d, evectmatrix Hin, scalar_complex *hfield, int cur_band_start, int cur_num_bands); -extern void maxwell_compute_H_from_B(maxwell_data *d, evectmatrix Bin, +extern void maxwell_compute_h_from_B(maxwell_data *d, evectmatrix Bin, + scalar_complex *hfield, + int Bin_band_start, int cur_num_bands); +extern void maxwell_compute_H_from_B(maxwell_data *d, evectmatrix Bin, evectmatrix Hout, scalar_complex *hfield, int Bin_band_start, int Hout_band_start, int cur_num_bands); +extern void maxwell_compute_H_from_shifted_B(maxwell_data *d, evectmatrix Bin, + evectmatrix Hout, scalar_complex *hfield, + int s1, int s2, int s3, + int Bin_band_start, int Hout_band_start, + int cur_num_bands); extern void maxwell_compute_e_from_d(maxwell_data *d, scalar_complex *dfield, int cur_num_bands); @@ -182,9 +190,9 @@ extern void maxwell_compute_e_from_d(maxwell_data *d, extern void maxwell_vectorfield_otherhalf(maxwell_data *d, scalar_complex *field, real phasex,real phasey,real phasez); -extern void maxwell_cscalarfield_otherhalf(maxwell_data *d, +extern void maxwell_cscalarfield_otherhalf(maxwell_data *d, scalar_complex *field, - real phasex, real phasey, + real phasex, real phasey, real phasez); extern void maxwell_scalarfield_otherhalf(maxwell_data *d, real *field); diff --git a/src/maxwell/maxwell_op.c b/src/maxwell/maxwell_op.c index bdb5f640..b4504dba 100644 --- a/src/maxwell/maxwell_op.c +++ b/src/maxwell/maxwell_op.c @@ -21,10 +21,11 @@ #include "imaxwell.h" #include +#include "xyz_loop.h" /**************************************************************************/ -/* assign a = v going from transverse to cartesian coordinates. +/* assign a = v going from transverse to cartesian coordinates. Here, a = (a[0],a[1],a[2]) is in cartesian coordinates. (v[0],v[vstride]) is in the transverse basis of k.m and k.n. */ static void assign_t2c(scalar *a, const k_data k, @@ -57,7 +58,7 @@ static void project_c2t(scalar *v, int vstride, const k_data k, /* assign a = k x v (cross product), going from transverse to cartesian coordinates. - + Here, a = (a[0],a[1],a[2]) and k = (k.kx,k.ky,k.kz) are in cartesian coordinates. (v[0],v[vstride]) is in the transverse basis of k.m and k.n. */ @@ -90,7 +91,7 @@ static void assign_cross_t2c(scalar *a, const k_data k, /* assign v = scale * k x a (cross product), going from cartesian to transverse coordinates. - + Here, a = (a[0],a[1],a[2]) and k = (k.kx,k.ky,k.kz) are in cartesian coordinates. (v[0],v[vstride]) is in the transverse basis of k.m and k.n. */ @@ -165,8 +166,8 @@ static void assign_ucross_t2c(scalar *a, const real u[3], const k_data k, /**************************************************************************/ -void maxwell_compute_fft(int dir, maxwell_data *d, - scalar *array_in, scalar *array_out, +void maxwell_compute_fft(int dir, maxwell_data *d, + scalar *array_in, scalar *array_out, int howmany, int stride, int dist) { #if defined(HAVE_FFTW3) @@ -189,14 +190,14 @@ void maxwell_compute_fft(int dir, maxwell_data *d, # ifdef SCALAR_COMPLEX # ifdef HAVE_MPI CHECK(stride==howmany && dist==1, "bug: unsupported stride/dist"); - plan = FFTW(mpi_plan_many_dft)(3, np, howmany, + plan = FFTW(mpi_plan_many_dft)(3, np, howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, carray_in, carray_out, mpb_comm, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); - iplan = FFTW(mpi_plan_many_dft)(3, np, howmany, + iplan = FFTW(mpi_plan_many_dft)(3, np, howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, carray_in, carray_out, @@ -218,13 +219,13 @@ void maxwell_compute_fft(int dir, maxwell_data *d, nr[rnk-1] = 2*(nr[rnk-1]/2 + 1); # ifdef HAVE_MPI CHECK(stride==howmany && dist==1, "bug: unsupported stride/dist"); - plan = FFTW(mpi_plan_many_dft_c2r)(rnk, np, howmany, + plan = FFTW(mpi_plan_many_dft_c2r)(rnk, np, howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, carray_in, rarray_out, mpb_comm, FFTW_ESTIMATE | FFTW_MPI_TRANSPOSED_IN); - iplan = FFTW(mpi_plan_many_dft_r2c)(rnk, np, howmany, + iplan = FFTW(mpi_plan_many_dft_r2c)(rnk, np, howmany, FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK, rarray_in, carray_out, @@ -246,7 +247,7 @@ void maxwell_compute_fft(int dir, maxwell_data *d, } /* note that the new-array execute functions should be safe - since we only apply maxwell_compute_fft to fftw_malloc'ed data + since we only apply maxwell_compute_fft to fftw_malloc'ed data (so we don't ever have misaligned arrays), and we check above that the strides etc. match */ # ifdef SCALAR_COMPLEX @@ -298,7 +299,7 @@ void maxwell_compute_fft(int dir, maxwell_data *d, CHECK(stride == howmany && dist == 1, "weird strides and dists don't work with fftwnd_mpi"); - + fftwnd_mpi((fftplan) (dir < 0 ? d->plans[0] : d->iplans[0]), howmany, (fftw_complex *) array_in, (fftw_complex *) NULL, @@ -325,7 +326,7 @@ void maxwell_compute_fft(int dir, maxwell_data *d, CHECK(stride == howmany && dist == 1, "weird strides and dists don't work with rfftwnd_mpi"); - + rfftwnd_mpi((fftplan) (dir < 0 ? d->plans[0] : d->iplans[0]), howmany, array_in, (scalar *) NULL, FFTW_TRANSPOSED_ORDER); @@ -390,12 +391,12 @@ void assign_symmatrix_vector(scalar_complex *newv, taking the curl and then Fourier transforming. The output array, dfield, is fft_output_size x cur_num_bands x 3, where fft_output_size is the local spatial indices and 3 is the field - components. + components. Note: actually, this computes just (k+G) x H, whereas the actual D field is i/omega i(k+G) x H...so, we are really computing -omega*D, here. */ -void maxwell_compute_d_from_H(maxwell_data *d, evectmatrix Hin, +void maxwell_compute_d_from_H(maxwell_data *d, evectmatrix Hin, scalar_complex *dfield, int cur_band_start, int cur_num_bands) { @@ -415,12 +416,12 @@ void maxwell_compute_d_from_H(maxwell_data *d, evectmatrix Hin, int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; - + for (b = 0; b < cur_num_bands; ++b) - assign_cross_t2c(&fft_data_in[3 * (ij2*cur_num_bands - + b)], - cur_k, - &Hin.data[ij * 2 * Hin.p + + assign_cross_t2c(&fft_data_in[3 * (ij2*cur_num_bands + + b)], + cur_k, + &Hin.data[ij * 2 * Hin.p + b + cur_band_start], Hin.p); } @@ -450,7 +451,7 @@ void maxwell_compute_e_from_d_(maxwell_data *d, int ib = 3 * (i * cur_num_bands + b); assign_symmatrix_vector(&dfield[ib], eps_inv, &dfield[ib]); } - } + } } void maxwell_compute_e_from_d(maxwell_data *d, scalar_complex *dfield, @@ -462,11 +463,11 @@ void maxwell_compute_e_from_d(maxwell_data *d, /* Compute the magnetic (H) field in Fourier space from the electric field (e) in position space; this amounts to Fourier transforming and then taking the curl. Also, multiply by scale. Other - parameters are as in compute_d_from_H. + parameters are as in compute_d_from_H. Note: we actually compute (k+G) x E, whereas the actual H field is -i/omega i(k+G) x E...so, we are actually computing omega*H, here. */ -void maxwell_compute_H_from_e(maxwell_data *d, evectmatrix Hout, +void maxwell_compute_H_from_e(maxwell_data *d, evectmatrix Hout, scalar_complex *efield, int cur_band_start, int cur_num_bands, real scale) @@ -484,19 +485,19 @@ void maxwell_compute_H_from_e(maxwell_data *d, evectmatrix Hout, /* convert back to Fourier space */ maxwell_compute_fft(-1, d, fft_data, fft_data_out, cur_num_bands*3, cur_num_bands*3, 1); - + /* then, compute Hout = curl(fft_data) (* scale factor): */ - + for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; - + for (b = 0; b < cur_num_bands; ++b) - assign_cross_c2t(&Hout.data[ij * 2 * Hout.p + + assign_cross_c2t(&Hout.data[ij * 2 * Hout.p + b + cur_band_start], - Hout.p, cur_k, + Hout.p, cur_k, &fft_data_out[3 * (ij2*cur_num_bands+b)], scale); } @@ -505,7 +506,7 @@ void maxwell_compute_H_from_e(maxwell_data *d, evectmatrix Hout, /* Compute H field in position space from Hin. Parameters and output formats are the same as for compute_d_from_H, above. */ -void maxwell_compute_h_from_H(maxwell_data *d, evectmatrix Hin, +void maxwell_compute_h_from_H(maxwell_data *d, evectmatrix Hin, scalar_complex *hfield, int cur_band_start, int cur_num_bands) { @@ -519,19 +520,19 @@ void maxwell_compute_h_from_H(maxwell_data *d, evectmatrix Hin, CHECK(cur_band_start >= 0 && cur_band_start + cur_num_bands <= Hin.p, "invalid range of bands for computing fields"); - /* first, compute fft_data = Hin, with the vector field converted + /* first, compute fft_data = Hin, with the vector field converted from transverse to cartesian basis: */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; - + for (b = 0; b < cur_num_bands; ++b) - assign_t2c(&fft_data_in[3 * (ij2*cur_num_bands - + b)], + assign_t2c(&fft_data_in[3 * (ij2*cur_num_bands + + b)], cur_k, - &Hin.data[ij * 2 * Hin.p + + &Hin.data[ij * 2 * Hin.p + b + cur_band_start], Hin.p); } @@ -541,31 +542,41 @@ void maxwell_compute_h_from_H(maxwell_data *d, evectmatrix Hin, cur_num_bands*3, cur_num_bands*3, 1); } -void maxwell_compute_H_from_B(maxwell_data *d, evectmatrix Bin, +void maxwell_compute_h_from_B(maxwell_data *d, evectmatrix Bin, + scalar_complex *hfield, + int Bin_band_start, int cur_num_bands) +{ + maxwell_compute_h_from_H(d, Bin, hfield, Bin_band_start, cur_num_bands); + if (d->mu_inv != NULL) + maxwell_compute_e_from_d_(d, hfield, cur_num_bands, d->mu_inv); +} + + + +void maxwell_compute_H_from_B(maxwell_data *d, evectmatrix Bin, evectmatrix Hout, scalar_complex *hfield, - int Bin_band_start, int Hout_band_start, + int Bin_band_start, int Hout_band_start, int cur_num_bands) { scalar *fft_data = (scalar *) hfield; scalar *fft_data_out = d->fft_data2 == d->fft_data ? fft_data : (fft_data == d->fft_data ? d->fft_data2 : d->fft_data); int i, j, b; real scale = 1.0 / Hout.N; /* scale factor to normalize FFTs */ - + if (d->mu_inv == NULL) { - if (Bin.data != Hout.data) - evectmatrix_copy_slice(Hout, Bin, - Hout_band_start, Bin_band_start, - cur_num_bands); - return; + if (Bin.data != Hout.data) + evectmatrix_copy_slice(Hout, Bin, + Hout_band_start, Bin_band_start, + cur_num_bands); + return; } - - maxwell_compute_h_from_H(d, Bin, hfield, Bin_band_start, cur_num_bands); - maxwell_compute_e_from_d_(d, hfield, cur_num_bands, d->mu_inv); - + + maxwell_compute_h_from_B(d, Bin, hfield, Bin_band_start, cur_num_bands); + /* convert back to Fourier space */ maxwell_compute_fft(-1, d, fft_data, fft_data_out, cur_num_bands*3, cur_num_bands*3, 1); - + /* then, compute Hout = (transverse component)(fft_data) * scale factor */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { @@ -573,15 +584,96 @@ void maxwell_compute_H_from_B(maxwell_data *d, evectmatrix Bin, int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; for (b = 0; b < cur_num_bands; ++b) - project_c2t(&Hout.data[ij * 2 * Hout.p + + project_c2t(&Hout.data[ij * 2 * Hout.p + b + Hout_band_start], - Hout.p, cur_k, + Hout.p, cur_k, &fft_data_out[3 * (ij2*cur_num_bands+b)], scale); } } +/**************************************************************************/ +/* This function is defined for the purpose of computing the "Bott" index. + It computes the "H" field (projected onto the transverse basis) from + the B field, similar to above, but *also* performs a shift on the B + field in reciprocal space. + + The shift in reciprocal space corresponds to shifting each Fourier + amplitude at a reciprocal vector G to the amplitude at G+s. It + is actually more convenient to perform this shift in position space, + along with the multiplication by mu_inv, by multiplying by a phase + factor exp(isx) via the shift theorem. + + The shift is specified by three integers (s1,s2,s3) indicating the + number of reciprocal lattice vectors to shift by in each direction. +*/ +void maxwell_compute_H_from_shifted_B(maxwell_data *d, evectmatrix Bin, + evectmatrix Hout, scalar_complex *hfield, + int s1, int s2, int s3, + int Bin_band_start, int Hout_band_start, + int cur_num_bands) +{ + scalar *fft_data = (scalar *) hfield; + scalar *fft_data_out = d->fft_data2 == d->fft_data ? fft_data : (fft_data == d->fft_data ? d->fft_data2 : d->fft_data); + int i, j, b; + real scale = 1.0 / Hout.N; /* scale factor to normalize FFTs */ + const double twopi = 6.2831853071795864769252867665590057683943388; + double twopi_1 = s1 * twopi / d->nx; + double twopi_2 = s2 * twopi / d->ny; + double twopi_3 = s3 * twopi / d->nz; + + maxwell_compute_h_from_H(d, Bin, hfield, Bin_band_start, cur_num_bands); + + /* adapted from maxwell_compute_e_from_d_: multiply by mu_inv and shift */ + if (d->mu_inv) { + LOOP_XYZ(d) { + symmetric_matrix mu_inv = d->mu_inv[xyz_index]; + double phase = twopi_1 * i1 + twopi_2 * i2 + twopi_3 * i3; + scalar_complex cphase; + CASSIGN_SCALAR(cphase, cos(phase), sin(phase)); + for (b = 0; b < cur_num_bands; ++b) { + int ib = 3 * (xyz_index * cur_num_bands + b); + assign_symmatrix_vector(&hfield[ib], mu_inv, &hfield[ib]); + CASSIGN_MULT(hfield[ib], hfield[ib], cphase) + CASSIGN_MULT(hfield[ib+1], hfield[ib+1], cphase) + CASSIGN_MULT(hfield[ib+2], hfield[ib+2], cphase) + } + }}} + } + else { + LOOP_XYZ(d) { + double phase = twopi_1 * i1 + twopi_2 * i2 + twopi_3 * i3; + scalar_complex cphase; + CASSIGN_SCALAR(cphase, cos(phase), sin(phase)); + for (b = 0; b < cur_num_bands; ++b) { + int ib = 3 * (xyz_index * cur_num_bands + b); + CASSIGN_MULT(hfield[ib], hfield[ib], cphase) + CASSIGN_MULT(hfield[ib+1], hfield[ib+1], cphase) + CASSIGN_MULT(hfield[ib+2], hfield[ib+2], cphase) + } + }}} + } + + /* convert back to Fourier space */ + maxwell_compute_fft(-1, d, fft_data, fft_data_out, + cur_num_bands*3, cur_num_bands*3, 1); + + /* then, compute Hout = (transverse component)(fft_data) * scale factor */ + for (i = 0; i < d->other_dims; ++i) + for (j = 0; j < d->last_dim; ++j) { + int ij = i * d->last_dim + j; + int ij2 = i * d->last_dim_size + j; + k_data cur_k = d->k_plus_G[ij]; + for (b = 0; b < cur_num_bands; ++b) + project_c2t(&Hout.data[ij * 2 * Hout.p + + b + Hout_band_start], + Hout.p, cur_k, + &fft_data_out[3 * (ij2*cur_num_bands+b)], + scale); + } +} + /**************************************************************************/ /* The following functions take a complex or real vector field @@ -693,7 +785,7 @@ void maxwell_vectorfield_otherhalf(maxwell_data *d, scalar_complex *field, phasey += phasez; CASSIGN_SCALAR(pyz, cos(phasey), sin(phasey)); -/* convenience macros to copy vectors, vectors times phases, +/* convenience macros to copy vectors, vectors times phases, and conjugated vectors: */ # define ASSIGN_V(f,k,f2,k2) { f[3*(k)+0] = f2[3*(k2)+0]; \ f[3*(k)+1] = f2[3*(k2)+1]; \ @@ -836,7 +928,7 @@ void maxwell_vectorfield_otherhalf(maxwell_data *d, scalar_complex *field, #endif /* ! SCALAR_COMPLEX */ } -/* as vectorfield_otherhalf, but operates on a complex scalar field +/* as vectorfield_otherhalf, but operates on a complex scalar field ... ugh, copy & paste job */ void maxwell_cscalarfield_otherhalf(maxwell_data *d, scalar_complex *field, real phasex, real phasey, real phasez) @@ -897,7 +989,7 @@ void maxwell_cscalarfield_otherhalf(maxwell_data *d, scalar_complex *field, phasey += phasez; CASSIGN_SCALAR(pyz, cos(phasey), sin(phasey)); -/* convenience macros to copy cscalars, cscalars times phases, +/* convenience macros to copy cscalars, cscalars times phases, and conjugated cscalars (THIS IS THE ONLY CODE THAT WAS CHANGED COMPARED TO vectorfield_otherhalf): */ # define ASSIGN_V(f,k,f2,k2) { f[k] = f2[k2]; } @@ -1186,7 +1278,7 @@ void maxwell_operator(evectmatrix Xin, evectmatrix Xout, void *data, int cur_band_start; scalar_complex *cdata; real scale; - + CHECK(d, "null maxwell data pointer!"); CHECK(Xin.c == 2, "fields don't have 2 components!"); @@ -1194,11 +1286,11 @@ void maxwell_operator(evectmatrix Xin, evectmatrix Xout, void *data, (void) Work; cdata = (scalar_complex *) d->fft_data; - scale = -1.0 / Xout.N; /* scale factor to normalize FFT; + scale = -1.0 / Xout.N; /* scale factor to normalize FFT; negative sign comes from 2 i's from curls */ /* compute the operator, num_fft_bands at a time: */ - for (cur_band_start = 0; cur_band_start < Xin.p; + for (cur_band_start = 0; cur_band_start < Xin.p; cur_band_start += d->num_fft_bands) { int cur_num_bands = MIN2(d->num_fft_bands, Xin.p - cur_band_start); @@ -1227,17 +1319,17 @@ void maxwell_muinv_operator(evectmatrix Xin, evectmatrix Xout, void *data, maxwell_data *d = (maxwell_data *) data; int cur_band_start; scalar_complex *cdata; - + CHECK(d, "null maxwell data pointer!"); CHECK(Xin.c == 2, "fields don't have 2 components!"); - + (void) is_current_eigenvector; /* unused */ (void) Work; - + cdata = (scalar_complex *) d->fft_data; /* compute the operator, num_fft_bands at a time: */ - for (cur_band_start = 0; cur_band_start < Xout.p; + for (cur_band_start = 0; cur_band_start < Xout.p; cur_band_start += d->num_fft_bands) { int cur_num_bands = MIN2(d->num_fft_bands, Xout.p - cur_band_start); maxwell_compute_H_from_B(d, Xin, Xout, cdata, @@ -1281,7 +1373,7 @@ void maxwell_target_operator(evectmatrix Xin, evectmatrix Xout, void *data, maxwell_target_operator1(Work, Xout, data, is_current_eigenvector, Work); } -/* Compute the operation Xout = curl 1/epsilon * i u x Xin, which +/* Compute the operation Xout = curl 1/epsilon * i u x Xin, which is useful operation in computing the group velocity (derivative of the maxwell operator). u is a vector in cartesian coordinates. */ void maxwell_ucross_op(evectmatrix Xin, evectmatrix Xout, @@ -1306,27 +1398,27 @@ void maxwell_ucross_op(evectmatrix Xin, evectmatrix Xout, for (cur_band_start = 0; cur_band_start < Xin.p; cur_band_start += d->num_fft_bands) { int cur_num_bands = MIN2(d->num_fft_bands, Xin.p - cur_band_start); - + /* first, compute fft_data = u x Xin: */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; - + for (b = 0; b < cur_num_bands; ++b) assign_ucross_t2c(&fft_data_in[3 * (ij2*cur_num_bands - + b)], - u, cur_k, - &Xin.data[ij * 2 * Xin.p + + + b)], + u, cur_k, + &Xin.data[ij * 2 * Xin.p + b + cur_band_start], Xin.p); } - + /* now, convert to position space via FFT: */ maxwell_compute_fft(+1, d, fft_data_in, fft_data, cur_num_bands*3, cur_num_bands*3, 1); - + maxwell_compute_e_from_d(d, cdata, cur_num_bands); maxwell_compute_H_from_e(d, Xout, cdata, cur_band_start, cur_num_bands, scale);