some corrections, scalar test is flawed!

17cd2423 · solomon · 6d30b38c · 17cd2423 · 17cd2423 · 17cd2423
Commit 17cd2423 authored 10 years ago by solomon
7 changed files
--- a/examples/scalar.cxx
+++ b/examples/scalar.cxx
@@ -29,36 +29,48 @@ int scalar(CTF::World    &dw){
  A.read_local(&np,&indices,&pairs);
  pass -=!(np<=1);
 
+  A.print();
  if (np>0){
    pass -=!(indices[0] == 0);
-    pass -=!(pairs[0] == 0.0);
+  assert(pass);
+    pass -=!(std::abs(pairs[0]) < 1.E-9);
+  assert(pass);
    pairs[0] = 4.2;  
  } 
  A.write(np,indices,pairs);
  free(indices);
  free(pairs);
  //A = 4.2;
+  A.print();
  A.read_local(&np,&indices,&pairs);
+  printf("np=%ld\n",np);
  pass -= !(np<=1);
+  assert(pass);
 
  if (np>0){
    pass -=(indices[0] != 0);
-    pass -=(pairs[0] != 4.2);
+  assert(pass);
+    pass -=!(pairs[0]-4.2 < 1.E-9);
+  assert(pass);
  } 
  free(indices);
  free(pairs);
  val = A;
-  pass -=(val != 4.2);
+  pass -=!(val-4.2 < 1.E-9);
+  assert(pass);
  
  CTF::Scalar<> B(4.3, dw);
-  pass -=(4.3 != (double)B);
+  pass -=!((double)B-4.3 < 1.E-9);
+  assert(pass);

  B=A;
-  pass -=(4.2 != (double)B);
+  pass -=!((double)B-4.2 < 1.E-9);
+  assert(pass);

  int n = 7;
  CTF::Matrix<> C(n,n,AS,dw);

+  //FIXME: this is nonsense! should result in zero tensor
  C["ij"]=A[""];
  

@@ -72,12 +84,20 @@ int scalar(CTF::World    &dw){
    printf("C sum is %lf, abs sum is %lf, C[\"ij\"]=%lf expectd %lf\n",
            C.reduce(CTF::OP_SUM), C.reduce(CTF::OP_SUMABS), val, n*n*4.2);
  }*/
-  pass-= !( fabs(C.reduce(CTF::OP_SUMABS)-n*(n-1)*4.2)<1.E-10);
-  
+  printf("%lf, %lf\n",C.reduce(CTF::OP_SUMABS),n*(n-1)*4.2);
+  pass-= !( fabs(C.reduce(CTF::OP_SUMABS)-n*(n-1)*4.2)<1.E-8);
+  assert(pass);
+
+  printf("NOW\n"); 
+ 
  C["ij"]=13.1;

+  printf("NOT NOW\n"); 
+  C.print();

+  printf("%lf, %lf\n",C.reduce(CTF::OP_SUMABS),n*(n-1)*13.1);
  pass-= !( fabs(C.reduce(CTF::OP_SUMABS)-n*(n-1)*13.1)<1.E-10);
+  assert(pass);
  int sizeN4[4] = {n,0,n,n};
  int shapeN4[4] = {NS,NS,SY,NS};
  CTF::Matrix<> E(n,n,NS,dw);
@@ -88,10 +108,12 @@ int scalar(CTF::World    &dw){
  E["ii"]=D["klij"]*E["ki"];
  
  pass-= !( fabs(E.reduce(CTF::OP_SUMABS)-0)>1.E-10);
+  assert(pass);
  
  E["ij"]=D["klij"]*E["ki"];

  pass-= !( fabs(E.reduce(CTF::OP_SUMABS)-0)<1.E-10);
+  assert(pass);
  
  if (rank == 0){
    MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);

--- a/src/interface/idx_tensor.cxx
+++ b/src/interface/idx_tensor.cxx
@@ -262,7 +262,6 @@ namespace CTF {
      summation s(&ts, NULL, output.sr->mulid(), 
                  output.parent, output.idx_map, output.scale);
      return s.estimate_time();
-      ts.set(output.scale);
    } else {
      summation s(this->parent, idx_map, scale,
                  output.parent, output.idx_map, output.scale);

--- a/src/summation/summation.cxx
+++ b/src/summation/summation.cxx
@@ -616,6 +616,60 @@ namespace CTF_int {
   
    CTF_int::contract_mst();

+    // FIXME: if custom function, we currently don't know whether its odd, even or neither, so unpack everything
+    if (is_custom){
+      bool is_nonsym=true;
+      for (int i=0; i<A->order; i++){
+        if (A->sym[i] != NS){
+          is_nonsym = false;
+        }
+      }
+      if (!is_nonsym){
+        int sym_A[A->order];
+        std::fill(sym_A, sym_A+A->order, NS);
+        int idx_A[A->order];
+        for (int i=0; i<A->order; i++){
+          idx_A[i] = i;
+        }
+        tensor tA(A->sr, A->order, A->lens, sym_A, A->wrld, 1);
+        tA.is_home = 0;
+        summation st(A, idx_A, A->sr->mulid(), &tA, idx_A, A->sr->mulid());
+        st.execute();
+        summation stme(*this);
+        stme.A = &tA;
+        stme.execute();
+        return SUCCESS;
+      }
+    }
+    if (is_custom){
+      bool is_nonsym=true;
+      for (int i=0; i<B->order; i++){
+        if (B->sym[i] != NS){
+          is_nonsym = false;
+        }
+      }
+      if (!is_nonsym){
+        int sym_B[B->order];
+        std::fill(sym_B, sym_B+B->order, NS);
+        int idx_B[B->order];
+        for (int i=0; i<B->order; i++){
+          idx_B[i] = i;
+        }
+        tensor tB(B->sr, B->order, B->lens, sym_B, B->wrld, 1);
+        tB.is_home = 0;
+        if (!B->sr->isequal(B->sr->addid(), beta)){
+          summation st(B, idx_B, B->sr->mulid(), &tB, idx_B, B->sr->mulid());
+          st.execute();
+        }
+        summation stme(*this);
+        stme.B = &tB;
+        stme.execute();
+        summation stme2(&tB, idx_B, B->sr->mulid(), B, idx_B, B->sr->addid());
+        stme2.execute();
+        return SUCCESS;
+      }
+    }
+
  #ifndef HOME_CONTRACT
    #ifdef USE_SYM_SUM
      ret = sym_sum_tsr(run_diag);
@@ -896,6 +950,7 @@ namespace CTF_int {
        signs.clear();
      }
    } else {
+      new_sum.alpha = alpha;
      new_sum.sum_tensors(run_diag);
 /*      sum_tensors(alpha, beta, new_type.tid_A, new_type.tid_B, new_type.idx_map_A,
                  new_type.idx_map_B, ftsr, felm, run_diag);*/
@@ -1489,7 +1544,6 @@ namespace CTF_int {
    if (global_comm.rank == 0)
      printf("Initial mappings:\n");
    A->print_map(stdout);
-    B->print_map(stdout);
  #endif

    //FIXME: try to avoid unfolding immediately, as its not always necessary

--- a/src/summation/sym_seq_sum.cxx
+++ b/src/summation/sym_seq_sum.cxx
@@ -286,8 +286,8 @@ namespace CTF_int {
      if (sym_pass){
        if (alpha != NULL){
          char tmp[sr_B->el_size];
-          sr_B->abs(A+idx_A*sr_A->el_size, tmp);
-          func.apply_f(A+idx_A*sr_A->el_size, tmp);
+          sr_B->mul(A+sr_A->el_size*idx_A, alpha, tmp);
+          func.apply_f(tmp, tmp);
          sr_B->add(B+idx_B*sr_B->el_size, tmp, B+sr_B->el_size*idx_B);
          CTF_FLOPS_ADD(2);
        } else {

--- a/src/symmetry/symmetrization.cxx
+++ b/src/symmetry/symmetrization.cxx
@@ -606,13 +606,13 @@ namespace CTF_int {
      while (tsr_A->sym[j] != NS){
        j++;
        for (k=0; k<(int)perms.size(); k++){
-          new_type = summation(perms[k]);
+          summation new_type1 = summation(perms[k]);
          sign = signs[k];
          if (tsr_A->sym[j-1] == AS) sign *= -1;
-          tmp                    = new_type.idx_A[i];
-          new_type.idx_A[i]  = new_type.idx_A[j];
-          new_type.idx_A[j]  = tmp;
-          add_sym_perm(perms, signs, new_type, sign);
+          tmp                = new_type1.idx_A[i];
+          new_type1.idx_A[i]  = new_type1.idx_A[j];
+          new_type1.idx_A[j]  = tmp;
+          add_sym_perm(perms, signs, new_type1, sign);
        }
      }
    }
@@ -621,13 +621,13 @@ namespace CTF_int {
      while (tsr_B->sym[j] != NS){
        j++;
        for (k=0; k<(int)perms.size(); k++){
-          new_type = summation(perms[k]);
+          summation new_type2 = summation(perms[k]);
          sign = signs[k];
          if (tsr_B->sym[j-1] == AS) sign *= -1;
-          tmp                    = new_type.idx_B[i];
-          new_type.idx_B[i]  = new_type.idx_B[j];
-          new_type.idx_B[j]  = tmp;
-          add_sym_perm(perms, signs, new_type, sign);
+          tmp                    = new_type2.idx_B[i];
+          new_type2.idx_B[i]  = new_type2.idx_B[j];
+          new_type2.idx_B[j]  = tmp;
+          add_sym_perm(perms, signs, new_type2, sign);
        }
      }
    }

--- a/src/tensor/algstrct.cxx
+++ b/src/tensor/algstrct.cxx
@@ -691,13 +691,14 @@ namespace CTF_int {
      case 32:
        return std::lower_bound((CompPair<32>*)ptr,((CompPair<32>*)ptr)+n, ((CompPair<32>*)op.ptr)[0]) - (CompPair<32>*)ptr;
        break;
-      default:
+      default: {
        int64_t keys[n];
        #pragma omp parallel
        for (int64_t i=0; i<n; i++){
          keys[i] = (*this)[i].k();
        }
        return std::lower_bound(keys, keys+n, op.k())-keys;
+        } break;
    }
  }


--- a/src/tensor/untyped_tensor.cxx
+++ b/src/tensor/untyped_tensor.cxx
@@ -1014,7 +1014,6 @@ namespace CTF_int {
    MPI_Gatherv(pmy_data, my_sz*sr->pair_size(), MPI_CHAR, 
               pall_data, recvcnts, displs, MPI_CHAR, 0, wrld->cdt.cm);
    PairIterator all_data = PairIterator(sr,pall_data);
-    printf("my_Sz = %lld tot_sz=%lld\n",my_sz,tot_sz);
    if (wrld->rank == 0){
 //      all_data.sort(tot_sz);
      for (int64_t i=0; i<tot_sz; i++){
@@ -1035,7 +1034,7 @@ namespace CTF_int {
        }
        fprintf(fp," <");
        sr->print(all_data[i].d());
-        fprintf(fp,"> %lld\n",all_data[i].k());
+        fprintf(fp,"> %ld\n",all_data[i].k());
      }
      cfree(recvcnts);
      cfree(displs);