Fixed the sym sum bug Devin reported and added a test for it (diag_sym)

dc9b620c · Edgar Solomonik · 377d1b9b · dc9b620c · dc9b620c · dc9b620c
Commit dc9b620c authored 11 years ago by Edgar Solomonik
6 changed files
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -3,7 +3,10 @@ DEPS += $(addprefix $(DEPDIR)/,$(notdir $(patsubst %.o,%.Po,$(wildcard *.o))))
 include ../src/make/make.in
 include ../src/make/rules.mk

-examples: dft dft_3D gemm gemm_4D trace sym3 fast_sym fast_sym_4D ccsdt_t3_to_t2 weight_4D test_suite strassen
+examples: dft dft_3D gemm gemm_4D trace diag_sym sym3 fast_sym fast_sym_4D ccsdt_t3_to_t2 weight_4D test_suite strassen
+
+diag_sym: ${bindir}/diag_sym
+${bindir}/diag_sym: diag_sym.o ${libdir}/libctf.a

 ccsdt_t3_to_t2: ${bindir}/ccsdt_t3_to_t2
 ${bindir}/ccsdt_t3_to_t2: ccsdt_t3_to_t2.o ${libdir}/libctf.a

--- a/examples/diag_sym.cxx
+++ b/examples/diag_sym.cxx
+/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <math.h>
+#include <assert.h>
+#include <algorithm>
+#include <ctf.hpp>
+#include "../src/shared/util.h"
+
+int diag_sym(int const    n,
+             CTF_World   &dw){
+  int rank, i, num_pes, pass;
+  int64_t np;
+  double * pairs;
+  int64_t * indices;
+  
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
+
+
+  int shapeN4[] = {SY,NS,SY,NS};
+  int sizeN4[] = {n,n,n,n};
+
+  //* Creates distributed tensors initialized with zeros
+  CTF_Tensor A(4, sizeN4, shapeN4, dw);
+  CTF_Tensor B(4, sizeN4, shapeN4, dw);
+  CTF_Tensor C(4, sizeN4, shapeN4, dw);
+
+  srand48(13*rank);
+
+  CTF_Matrix mA(n,n,NS,dw);
+  CTF_Matrix mB(n,n,NS,dw);
+  mA.get_local_data(&np, &indices, &pairs);
+  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
+  mA.write_remote_data(np, indices, pairs);
+  free(pairs);
+  free(indices);
+  mB.get_local_data(&np, &indices, &pairs);
+  for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
+  mB.write_remote_data(np, indices, pairs);
+  free(pairs);
+  free(indices);
+
+  A["abij"] = mA["ii"];
+  B["abij"] = mA["jj"];
+  A["abij"] -= mB["aa"];
+  B["abij"] -= mB["bb"];
+  C["abij"] = A["abij"]-B["abij"];
+
+  double norm = C.reduce(CTF_OP_SQNRM2);
+  
+  if (norm < 1.E-6){
+    pass = 1;
+    if (rank == 0)
+      printf("{(A[\"(ab)(ij)\"]=mA[\"ii\"]-mB[\"aa\"]=mA[\"jj\"]-mB[\"bb\"]} passed \n");
+  } else {
+    pass = 0;
+    if (rank == 0)
+      printf("{(A[\"(ab)(ij)\"]=mA[\"ii\"]-mB[\"aa\"]=mA[\"jj\"]-mB[\"bb\"]} failed \n");
+  }
+  return pass;
+} 
+
+
+#ifndef TEST_SUITE
+char* getCmdOption(char ** begin,
+                   char ** end,
+                   const   std::string & option){
+  char ** itr = std::find(begin, end, option);
+  if (itr != end && ++itr != end){
+    return *itr;
+  }
+  return 0;
+}
+
+
+int main(int argc, char ** argv){
+  int rank, np, n;
+  int const in_num = argc;
+  char ** input_str = argv;
+
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &np);
+
+  if (getCmdOption(input_str, input_str+in_num, "-n")){
+    n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
+    if (n < 0) n = 7;
+  } else n = 7;
+
+
+  {
+    CTF_World dw(argc, argv);
+    diag_sym(n, dw);
+  }
+
+  MPI_Finalize();
+  return 0;
+}
+#endif
--- a/src/dist_tensor/dist_tensor_fold.cxx
+++ b/src/dist_tensor/dist_tensor_fold.cxx
@@ -1245,10 +1245,14 @@ int dist_tensor<dtype>::unfold_broken_sym(CTF_sum_type_t const *  type,
      iA = type->idx_map_A[i];
      if (idx_arr[2*iA+1] != -1){
        if (tsr_B->sym[idx_arr[2*iA+1]] == NS ||
+            idx_arr[2*type->idx_map_A[i+1]+1] == -1 ||
            type->idx_map_A[i+1] != type->idx_map_B[idx_arr[2*iA+1]+1]){
          sidx = 2*i;
          break;
        }
+      } else if (idx_arr[2*type->idx_map_A[i+1]+1] != -1){
+        sidx = 2*i;
+        break;
      }
    }
  } 
@@ -1258,10 +1262,14 @@ int dist_tensor<dtype>::unfold_broken_sym(CTF_sum_type_t const *  type,
        iB = type->idx_map_B[i];
        if (idx_arr[2*iB+0] != -1){
          if (tsr_A->sym[idx_arr[2*iB+0]] == NS ||
+              idx_arr[2*type->idx_map_B[i+1]+0] == -1 ||
              type->idx_map_B[i+1] != type->idx_map_A[idx_arr[2*iB+0]+1]){
            sidx = 2*i+1;
            break;
          }
+        } else if (idx_arr[2*type->idx_map_B[i+1]+0] != -1){
+          sidx = 2*i+1;
+          break;
        }
      }
    }

--- a/src/dist_tensor/dist_tensor_op.cxx
+++ b/src/dist_tensor/dist_tensor_op.cxx
@@ -1897,6 +1897,7 @@ int dist_tensor<dtype>::sym_sum_tsr( dtype const                alpha_,
    if (ntid_A != type->tid_A) del_tsr(ntid_A);
    CTF_free(map_A);
    ntid_A = new_tid;
+    new_type.tid_A = new_tid;
    map_A = new_idx_map;
  }
  nst_B = 0;
@@ -1905,12 +1906,13 @@ int dist_tensor<dtype>::sym_sum_tsr( dtype const                alpha_,
    dstack_tid_B[nst_B] = ntid_B;
    nst_B++;
    ntid_B = new_tid;
+    new_type.tid_B = new_tid;
    map_B = new_idx_map;
  }

  if (ntid_A == ntid_B){
    clone_tensor(ntid_A, 1, &new_tid);
-    CTF_sum_type_t new_type = *type;
+    new_type = *type;
    new_type.tid_A = new_tid;
    stat = sym_sum_tsr(alpha_, beta, &new_type, ftsr, felm, run_diag);
    del_tsr(new_tid);
@@ -1980,8 +1982,8 @@ int dist_tensor<dtype>::sym_sum_tsr( dtype const                alpha_,
    CTF_free(unfold_type.idx_map_A);
    CTF_free(unfold_type.idx_map_B);
  } else {
-    sum_tensors(alpha, beta, type->tid_A, type->tid_B, type->idx_map_A, 
-                type->idx_map_B, ftsr, felm, run_diag);
+    sum_tensors(alpha, beta, new_type.tid_A, new_type.tid_B, new_type.idx_map_A, 
+                new_type.idx_map_B, ftsr, felm, run_diag);
  }
  if (ntid_A != type->tid_A) del_tsr(ntid_A);
  for (i=nst_B-1; i>=0; i--){

--- a/src/make/rules.mk
+++ b/src/make/rules.mk
@@ -2,7 +2,7 @@ all: $(DEFAULT_COMPONENTS)


 EXECUTABLES = pgemm_test nonsq_pgemm_test nonsq_pgemm_bench \
-              examples dft dft_3D gemm gemm_4D trace sym3 \
+              examples dft dft_3D gemm gemm_4D trace diag_sym sym3 \
              ccsdt_t3_to_t2 weight_4D test_suite fast_sym \
              fast_sym_4D strassen


--- a/src/test/test_suite.cxx
+++ b/src/test/test_suite.cxx
@@ -17,6 +17,7 @@
 #include "../../examples/gemm.cxx"
 #include "../../examples/gemm_4D.cxx"
 #include "../../examples/trace.cxx"
+#include "../../examples/diag_sym.cxx"
 #include "../../examples/dft.cxx"
 #include "../../examples/dft_3D.cxx"
 #include "../../examples/fast_sym.cxx"
@@ -121,6 +122,10 @@ int main(int argc, char ** argv){
      printf("Testing a 2D trace operation with n = %d:\n",n*n);
    pass.push_back(trace(n*n, dw));
    
+    if (rank == 0)
+      printf("Testing a diag sym operation with n = %d:\n",n);
+    pass.push_back(diag_sym(n, dw));
+    
    if (rank == 0)
      printf("Testing fast symmetric multiplication operation with n = %d:\n",n*n);
    pass.push_back(fast_sym(n*n, dw));