fast_sym_4D.cxx 4.94 KB
Newer Older
1
/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
2 3 4 5 6 7
/** \addtogroup examples 
  * @{ 
  * \defgroup fast_sym_4D
  * @{ 
  * \brief A clever way to multiply symmetric matrices of nonsymmetric matricers
  */
8 9 10 11 12 13 14 15 16 17 18

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <math.h>
#include <assert.h>
#include <vector>
#include <algorithm>
#include <ctf.hpp>

19

20 21
int fast_sym_4D(int const     n,
                CTF_World    &ctf){
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
  int rank, i, num_pes;
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &num_pes);

  int len3[] = {n,n,n};
  int len4[] = {n,n,n,n};
  int len5[] = {n,n,n,n,n};
  int NNN[] = {NS,NS,NS};
  int HNNN[] = {SH,NS,NS,NS};
  int YYNNN[] = {SY,SY,NS,NS,NS};

  CTF_Tensor A(4, len4, HNNN, ctf);
  CTF_Tensor B(4, len4, HNNN, ctf);
  CTF_Tensor C(4, len4, HNNN, ctf);
  CTF_Tensor C_ans(4, len4, HNNN, ctf);
  
  CTF_Tensor A_rep(5, len5, YYNNN, ctf);
  CTF_Tensor B_rep(5, len5, YYNNN, ctf);
  CTF_Tensor Z(5, len5, YYNNN, ctf);
  CTF_Tensor As(3, len3, NNN, ctf);
  CTF_Tensor Bs(3, len3, NNN, ctf);
  CTF_Tensor Cs(3, len3, NNN, ctf);

  {
solomon's avatar
solomon committed
47
    int64_t * indices;
48
    double * values;
solomon's avatar
solomon committed
49
    int64_t size;
50 51
    srand48(173*rank);

52
    A.read_local(&size, &indices, &values);
53 54 55
    for (i=0; i<size; i++){
      values[i] = drand48();
    }
56
    A.write(size, indices, values);
57 58
    free(indices);
    free(values);
59
    B.read_local(&size, &indices, &values);
60 61 62
    for (i=0; i<size; i++){
      values[i] = drand48();
    }
63
    B.write(size, indices, values);
64 65 66
    free(indices);
    free(values);
  }
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84

  
/*  std::cout << "start C =As*B"<< "\n";
  C["ijab"] = As["ial"]*B["ijlb"];
  C.print(stdout, .000000001);
  int64_t sz;
  double const * data = C.get_raw_data(&sz);
  //for (int64_t i=0; i<n*(n-1)*(n-2); i++){
  for (int64_t i=0; i<sz; i++){
    if (std::abs(data[i] ) > .0000000001)
      printf("C[%ld] = %lf\n", i, data[i]);
  }
  std::cout << "C norm " << C.norm2() << "\n";
  C["ijab"]=0.;
  assert(0);
*/


85 86
  C_ans["ijab"] = A["ikal"]*B["kjlb"];

87 88 89 90 91 92 93 94 95 96
#ifdef USE_SYM_SUM
  A_rep["ijkal"] += A["ijal"];
  B_rep["ijklb"] += B["ijlb"];
  Z["ijkab"] += A_rep["ijkal"]*B_rep["ijklb"];
  C["ijab"] += Z["ijkab"];
  Cs["iab"] += A["ikal"]*B["iklb"];
  As["ial"] += A["ikal"];
  Bs["ilb"] += B["iklb"];
  C["ijab"] -= ((double)n)*A["ijal"]*B["ijlb"];
  C["ijab"] -= Cs["iab"];
97 98
  //std::cout << "C norm " << C.norm2() << "\n";
  //C.print();
99
  C["ijab"] -= As["ial"]*B["ijlb"];
100
  //C["ijab"] -= A["ijal"]*Bs["jlb"];
101
  C["ijab"] -= A["ijal"]*Bs["jlb"];
102 103 104 105 106 107 108 109
  /*int64_t sz;
  double const * data = C.get_raw_data(&sz);
  //for (int64_t i=0; i<n*(n-1)*(n-2); i++){
  for (int64_t i=0; i<sz; i++){
    printf("C[%ld] = %lf\n", i, data[i]);
  }
  std::cout << "after C norm " << C.norm2() << "\n";
  std::cout << "after C norm1 " << C.norm1() << "\n";*/
110
#else
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
  A_rep["ijkal"] += A["ijal"];
  A_rep["ijkal"] += A["ikal"];
  A_rep["ijkal"] += A["jkal"];
  B_rep["ijklb"] += B["ijlb"];
  B_rep["ijklb"] += B["iklb"];
  B_rep["ijklb"] += B["jklb"];
  Z["ijkab"] += A_rep["ijkal"]*B_rep["ijklb"];
  C["ijab"] += Z["ijkab"];
  C["ijab"] += Z["ikjab"];
  C["ijab"] += Z["kijab"];
  C["ijab"] -= Z["ijjab"];
  C["ijab"] -= Z["iijab"];
  Cs["iab"] += A["ikal"]*B["iklb"];
  As["ial"] += A["ikal"];
  As["ial"] += A["kial"];
  Bs["ilb"] += B["iklb"];
  Bs["ilb"] += B["kilb"];
  C["ijab"] -= ((double)n)*A["ijal"]*B["ijlb"];
  C["ijab"] -= Cs["iab"];
  C["ijab"] -= Cs["jab"];
  C["ijab"] -= As["ial"]*B["ijlb"];
  C["ijab"] -= A["ijal"]*Bs["jlb"];
133
#endif
134 135 136 137 138 139 140 141 142 143 144

  if (n<4){
    printf("A:\n");
    A.print();
    printf("B:\n");
    B.print();
    printf("C_ans:\n");
    C_ans.print();
    printf("C:\n");
    C.print();
  }
145
  CTF_Tensor Diff(4, len4, HNNN, ctf);
146
  Diff["ijab"] = C["ijab"]-C_ans["ijab"];
147
  double nrm = Diff.norm2();
148
  int pass = (nrm <=1.E-10);
149 150 151
  
  if (rank == 0){
    MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
152 153
    if (pass) printf("{ C[\"(ij)ab\"] = A[\"(ik)al\"]*B[\"(kj)lb\"] } passed\n");
    else      printf("{ C[\"(ij)ab\"] = A[\"(ik)al\"]*B[\"(kj)lb\"] } failed\n");
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
  } else 
    MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
  return pass;
}

#ifndef TEST_SUITE
char* getCmdOption(char ** begin,
                   char ** end,
                   const   std::string & option){
  char ** itr = std::find(begin, end, option);
  if (itr != end && ++itr != end){
    return *itr;
  }
  return 0;
}


int main(int argc, char ** argv){
  int rank, np, n;
  int const in_num = argc;
  char ** input_str = argv;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &np);

  if (getCmdOption(input_str, input_str+in_num, "-n")){
    n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
182 183
    if (n < 0) n = 6;
  } else n = 6;
184 185 186 187 188 189

  {
    CTF_World dw(MPI_COMM_WORLD, argc, argv);
    if (rank == 0){
      printf("Computing C_(ij)ab = A_(ik)al*B_(kj)lb\n");
    }
190
    int pass = fast_sym_4D(n, dw);
191 192 193 194 195 196
    assert(pass);
  }
  
  MPI_Finalize();
  return 0;
}
197 198 199 200 201
/**
 * @} 
 * @}
 */

202 203
#endif