Skip to content

Commit 9295179

Browse files
author
Martin D. Weinberg
committed
Use complex array for covariance in Cuda computation; update initialization logic for covar computation
1 parent ae54ded commit 9295179

3 files changed

Lines changed: 58 additions & 67 deletions

File tree

expui/BiorthBasis.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,6 +1571,7 @@ namespace BasisClasses
15711571
if (EVEN_M) sl->setEven(EVEN_M);
15721572
if (pcavar) {
15731573
sl->setSampT(sampT);
1574+
sl->init_covar();
15741575
sl->set_covar(true);
15751576
}
15761577

exputil/EmpCylSL.cc

Lines changed: 56 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <iostream>
44
#include <iomanip>
55
#include <sstream>
6+
#include <cassert>
67
#include <memory>
78
#include <limits>
89
#include <string>
@@ -1923,11 +1924,18 @@ void EmpCylSL::setup_accumulation(int mlevel)
19231924
}
19241925
}
19251926

1926-
if (PCAVAR and sampT>1) {
1927+
if ((covar or PCAVAR) and sampT>1) {
19271928
for (int nth=0; nth<nthrds; nth++) {
19281929
for (unsigned T=0; T<sampT; T++) {
19291930
numbT1[nth][T] = 0;
19301931
massT1[nth][T] = 0.0;
1932+
}
1933+
}
1934+
}
1935+
1936+
if (PCAVAR and sampT>1) {
1937+
for (int nth=0; nth<nthrds; nth++) {
1938+
for (unsigned T=0; T<sampT; T++) {
19311939
covV[nth][T].resize(MMAX+1);
19321940
covM[nth][T].resize(MMAX+1);
19331941
for (int mm=0; mm<=MMAX; mm++) {
@@ -2067,43 +2075,40 @@ void EmpCylSL::init_pca()
20672075

20682076
void EmpCylSL::init_covar()
20692077
{
2070-
if (covar) {
2071-
2072-
if (defSampT) sampT = defSampT;
2073-
else sampT = floor(sqrt(nbodstot));
2074-
2075-
pthread_mutex_init(&used_lock, NULL);
2078+
if (defSampT) sampT = defSampT;
2079+
else sampT = floor(sqrt(nbodstot));
20762080

2077-
MV.resize(nthrds);
2078-
VC.resize(nthrds);
2081+
pthread_mutex_init(&used_lock, NULL);
20792082

2080-
// These are shared with PCA
2081-
numbT1.resize(nthrds);
2082-
massT1.resize(nthrds);
2083-
numbT .resize(sampT, 0);
2084-
massT .resize(sampT, 0);
2083+
MV.resize(nthrds);
2084+
VC.resize(nthrds);
20852085

2086-
for (int nth=0; nth<nthrds;nth++) {
2087-
2088-
MV[nth].resize(sampT);
2089-
VC[nth].resize(sampT);
2090-
2091-
for (unsigned T=0; T<sampT; T++) {
2092-
2093-
MV[nth][T].resize(MMAX + 1);
2094-
VC[nth][T].resize(MMAX + 1);
2086+
// These are shared with PCA
2087+
numbT1.resize(nthrds);
2088+
massT1.resize(nthrds);
2089+
numbT .resize(sampT, 0);
2090+
massT .resize(sampT, 0);
2091+
2092+
for (int nth=0; nth<nthrds;nth++) {
2093+
2094+
MV[nth].resize(sampT);
2095+
VC[nth].resize(sampT);
2096+
2097+
for (unsigned T=0; T<sampT; T++) {
20952098

2096-
for (unsigned mm=0; mm<=MMAX; mm++) {
2097-
MV[nth][T][mm].resize(NORDER, NORDER);
2098-
VC[nth][T][mm].resize(NORDER);
2099-
MV[nth][T][mm].setZero();
2100-
VC[nth][T][mm].setZero();
2101-
}
2099+
MV[nth][T].resize(MMAX + 1);
2100+
VC[nth][T].resize(MMAX + 1);
2101+
2102+
for (unsigned mm=0; mm<=MMAX; mm++) {
2103+
MV[nth][T][mm].resize(NORDER, NORDER);
2104+
VC[nth][T][mm].resize(NORDER);
2105+
MV[nth][T][mm].setZero();
2106+
VC[nth][T][mm].setZero();
21022107
}
2103-
2104-
numbT1[nth].resize(sampT, 0);
2105-
massT1[nth].resize(sampT, 0);
21062108
}
2109+
2110+
numbT1[nth].resize(sampT, 0);
2111+
massT1[nth].resize(sampT, 0);
21072112
}
21082113
}
21092114

@@ -3928,7 +3933,7 @@ void EmpCylSL::accumulate(std::vector<Particle>& part, int mlevel,
39283933
int ncnt=0;
39293934
if (myid==0 && verbose) cout << endl;
39303935

3931-
setup_accumulation();
3936+
setup_accumulation(mlevel);
39323937

39333938
for (auto p=part.begin(); p!=part.end(); p++) {
39343939

@@ -4041,9 +4046,6 @@ void EmpCylSL::accumulate(double r, double z, double phi, double mass,
40414046

40424047
howmany1[mlevel][id]++;
40434048

4044-
double msin, mcos;
4045-
int mm;
4046-
40474049
double norm = -4.0*M_PI;
40484050

40494051
unsigned whch;
@@ -4057,10 +4059,10 @@ void EmpCylSL::accumulate(double r, double z, double phi, double mass,
40574059

40584060
get_pot(vc[id], vs[id], r, z);
40594061

4060-
for (mm=0; mm<=MMAX; mm++) {
4062+
for (int mm=0; mm<=MMAX; mm++) {
40614063

4062-
mcos = cos(phi*mm);
4063-
msin = sin(phi*mm);
4064+
double mcos = cos(phi*mm);
4065+
double msin = sin(phi*mm);
40644066

40654067
for (int nn=0; nn<rank3; nn++) {
40664068
double hold = norm * mass * mcos * vc[id](mm, nn);
@@ -4102,8 +4104,17 @@ void EmpCylSL::accumulate(double r, double z, double phi, double mass,
41024104
}
41034105

41044106
if (compute and covar) {
4105-
Eigen::VectorXcd vec = std::complex<double>(mcos, msin) *
4106-
vc[id].row(mm).transpose() * norm;
4107+
int size = vc[id].row(mm).size();
4108+
assert(size == NORDER && "size of vectors must match");
4109+
Eigen::VectorXcd vec(size);
4110+
4111+
Eigen::VectorXd vC = vc[id].row(mm).transpose() * norm;
4112+
Eigen::VectorXd vS = vs[id].row(mm).transpose() * norm;
4113+
4114+
if (mm==0) vS.setZero();
4115+
4116+
vec.real() = vC*mcos + vS*msin;
4117+
vec.imag() = vC*msin - vS*mcos;
41074118

41084119
VC[id][whch][mm] += mass * vec;
41094120
MV[id][whch][mm] += mass * (vec * vec.adjoint());
@@ -4925,15 +4936,6 @@ void EmpCylSL::pca_hall(bool compute, bool subsamp)
49254936
if (PCAEOF)
49264937
for (auto & v : tvar[nth]) v.setZero();
49274938

4928-
if (covar) {
4929-
for (unsigned T=0; T<sampT; T++) {
4930-
for (unsigned mm=0; mm<=MMAX; mm++) {
4931-
MV[nth][T][mm].setZero();
4932-
VC[nth][T][mm].setZero();
4933-
}
4934-
}
4935-
}
4936-
49374939
if (PCAVAR) {
49384940
for (unsigned T=0; T<sampT; T++) {
49394941
numbT1[nth][T] = 0;
@@ -4965,22 +4967,13 @@ EmpCylSL::getCoefCovariance()
49654967
std::get<0>(ret) = Eigen::Tensor<std::complex<double>, 3>(sampT, MMAX+1, NORDER);
49664968
std::get<1>(ret) = Eigen::Tensor<std::complex<double>, 4>(sampT, MMAX+1, NORDER, NORDER);
49674969

4968-
/*
4969-
for (unsigned T=0; T<sampT; T++) {
4970-
for (int M=0; M<=MMAX; M++) {
4971-
for (int n1=0; n1<NORDER; n1++) {
4972-
std::get<0>(ret)(T, M, n1) = VC[0][T][M](n1);
4973-
for (int n2=0; n2<NORDER; n2++)
4974-
std::get<1>(ret)(T, M, n1, n2) = MV[0][T][M](n1, n2);
4975-
}
4976-
}
4977-
}
4978-
*/
4970+
using Tmap1 = Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 1>>;
4971+
using Tmap2 = Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 2>>;
49794972

49804973
for (unsigned T=0; T<sampT; T++) {
49814974
for (int M=0; M<=MMAX; M++) {
4982-
std::get<0>(ret).chip(T, 0).chip(M, 0) = Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 1>>(VC[0][T][M].data(), NORDER);
4983-
std::get<1>(ret).chip(T, 0).chip(M, 0) = Eigen::TensorMap<Eigen::Tensor<std::complex<double>, 2>>(MV[0][T][M].data(), NORDER, NORDER);
4975+
std::get<0>(ret).chip(T, 0).chip(M, 0) = Tmap1(VC[0][T][M].data(), NORDER);
4976+
std::get<1>(ret).chip(T, 0).chip(M, 0) = Tmap2(MV[0][T][M].data(), NORDER, NORDER);
49844977
}
49854978
}
49864979

include/EmpCylSL.H

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -642,10 +642,7 @@ public:
642642
void init_covar();
643643

644644
//! Turn on/off covariance computation
645-
void set_covar(bool tf) {
646-
covar = tf;
647-
if (tf) init_covar();
648-
}
645+
void set_covar(bool tf) { covar = tf; }
649646

650647
//! Necessary member function currently unused (change design?)
651648
void determine_coefficients() {};

0 commit comments

Comments
 (0)