ctf.hpp 55 KB
Newer Older
1 2
/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/

3 4
#ifndef __tCTF_HPP__
#define __tCTF_HPP__
5

solomon's avatar
solomon committed
6 7
#define CTF_VERSION 100

8 9 10
#include "mpi.h"
#include <stdio.h>
#include <stdint.h>
11
#include <vector>
12
#include <deque>
Ducky's avatar
Ducky committed
13 14
#include <set>
#include <map>
solomon's avatar
solomon committed
15
#include <assert.h>
16 17
#include "../src/dist_tensor/cyclopstf.hpp"

Edgar Solomonik's avatar
Edgar Solomonik committed
18 19 20 21 22 23 24 25 26 27 28 29 30 31
/**
 * labels corresponding to symmetry of each tensor dimension
 * NS = 0 - nonsymmetric
 * SY = 1 - symmetric
 * AS = 2 - antisymmetric
 * SH = 3 - symmetric hollow
 */
#if (!defined NS && !defined SY && !defined SH)
#define NS 0
#define SY 1
#define AS 2
#define SH 3
#endif

32
typedef long_int int64_t;
solomon's avatar
solomon committed
33

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
template<typename dtype> class tCTF_fscl;
template<typename dtype> class tCTF_fsum;
template<typename dtype> class tCTF_fctr;
template<typename dtype> class tCTF_Idx_Tensor;
template<typename dtype> class tCTF_Sparse_Tensor;
template<typename dtype> class tCTF_Term;
template<typename dtype> class tCTF_Sum_Term;
template<typename dtype> class tCTF_Contract_Term;

/**
 * \defgroup CTF CTF: main C++ interface
 * @{
 */


Edgar Solomonik's avatar
Edgar Solomonik committed
49 50 51 52 53 54
/**
 * \brief reduction types for tensor data (enum actually defined in ../src/dist_tensor/cyclopstf.hpp)
 */
//enum CTF_OP { CTF_OP_SUM, CTF_OP_SUMABS, CTF_OP_SQNRM2,
//              CTF_OP_MAX, CTF_OP_MIN, CTF_OP_MAXABS, CTF_OP_MINABS };

55
/**
56
 * \brief an instance of the tCTF library (world) on a MPI communicator
57
 */
58 59
template<typename dtype>
class tCTF_World {
60 61
  public:
    MPI_Comm comm;
62
    tCTF<dtype> * ctf;
63 64 65

  public:
    /**
66 67 68 69 70
     * \brief creates tCTF library on comm_ that can output profile data 
     *        into a file with a name based on the main args
     * \param[in] comm_ MPI communicator associated with this CTF instance
     * \param[in] argc number of main arguments 
     * \param[in] argv main arguments 
71
     */
72
    tCTF_World(int argc, char * const * argv);
73

74
    /**
75 76 77 78 79
     * \brief creates tCTF library on comm_ that can output profile data 
     *        into a file with a name based on the main args
     * \param[in] comm_ MPI communicator associated with this CTF instance
     * \param[in] argc number of main arguments 
     * \param[in] argv main arguments 
80
     */
81
    tCTF_World(MPI_Comm       comm_ = MPI_COMM_WORLD,
82
               int            argc = 0,
83
               char * const * argv = NULL);
84 85

    /**
86
     * \brief creates tCTF library on comm_
Edgar Solomonik's avatar
Edgar Solomonik committed
87 88
     * \param[in] ndim number of torus network dimensions
     * \param[in] lens lengths of torus network dimensions
89
     * \param[in] comm MPI global context for this CTF World
90 91
     * \param[in] argc number of main arguments 
     * \param[in] argv main arguments 
92
     */
93
    tCTF_World(int            ndim, 
94 95
               int const *    lens, 
               MPI_Comm       comm_ = MPI_COMM_WORLD,
96
               int            argc = 0,
97
               char * const * argv = NULL);
98 99

    /**
100
     * \brief frees tCTF library
101
     */
102
    ~tCTF_World();
103 104
};

105

solomon's avatar
solomon committed
106 107 108 109
/**
 * \brief semirings defined the elementwise operations computed 
 *         in each tensor contraction
 */
110 111 112 113 114 115 116
class CTF_Semiring {
  public: 
    int el_size;
    char * addid;
    char * mulid;
    MPI_Op addmop;

solomon's avatar
solomon committed
117
    // c = a+b
118 119 120 121
    void (*add)(char const * a, 
                char const * b,
                char *       c);
    
solomon's avatar
solomon committed
122
    // c = a*b
123 124 125 126
    void (*mul)(char const * a, 
                char const * b,
                char *       c);

solomon's avatar
solomon committed
127 128 129 130 131 132 133 134 135 136 137 138
    // beta*C["ij"]=alpha*A^tA["ik"]*B^tB["kj"];
    void (*gemm)(char         tA,
                 char         tB,
                 int          m,
                 int          n,
                 int          k,
                 char const * alpha,
                 char const * A,
                 char const * B,
                 char const * beta,
                 char *       C);

139
  public:
solomon's avatar
solomon committed
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    /**
     * \brief copy constructor
     * \param[in] other another semiring to copy from
     */
    CTF_Semiring(CTF_Semiring const &other);

    /**
     * \brief constructor creates semiring with all parameters
     * \param[in] el_size number of bytes in each element in the semiring set
     * \param[in] addid additive identity element 
     *              (e.g. 0.0 for the (+,*) semiring over doubles)
     * \param[in] mulid multiplicative identity element 
     *              (e.g. 1.0 for the (+,*) semiring over doubles)
     * \param[in] addmop addition operation to pass to MPI reductions
     * \param[in] add function pointer to add c=a+b on semiring
     * \param[in] mul function pointer to multiply c=a*b on semiring
     * \param[in] gemm function pointer to multiply blocks C, A, and B on semiring
     */
    CTF_Semiring(int          el_size, 
                 char const * addid,
                 char const * mulid,
                 MPI_Op       addmop,
                 void (*add )(char const * a,
                              char const * b,
                              char       * c),
                 void (*mul )(char const * a,
                              char const * b,
                              char       * c),
                 void (*gemm)(char         tA,
                              char         tB,
                              int          m,
                              int          n,
                              int          k,
                              char const * alpha,
                              char const * A,
                              char const * B,
                              char const * beta,
                              char *       C)=NULL);
    /**
     * \brief destructor frees addid and mulid
     */
    ~CTF_Semiring();
182 183 184 185 186 187 188 189 190 191
};

template <typename dtype, dtype (*func)(dtype const a, dtype const b)>
void detypedfunc(char const * a,
                 char const * b,
                 char *       c){
  dtype ans = (*func)(((dtype const *)a)[0], ((dtype const *)b)[0]);
  ((dtype *)c)[0] = ans;
}

solomon's avatar
solomon committed
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
template <typename dtype, 
          dtype (*gemm)(char,char,int,int,int,dtype,
                        dtype const*,dtype const*,dtype,dtype*)>
void detypedgemm(char         tA,
                 char         tB,
                 int          m,
                 int          n,
                 int          k,
                 char const * alpha,
                 char const * A,
                 char const * B,
                 char const * beta,
                 char *       C){
  (*gemm)(tA,tB,m,n,k,
          ((dtype const *)alpha)[0],
           (dtype const *)A,
           (dtype const *)B,
          ((dtype const *)beta)[0],
           (dtype       *)C);
}

// it seems to not be possible to initialize template argument function pointers
// to NULL, so defining this dummy_gemm function instead
template<typename dtype>
dtype dummy_gemm(char,char,int,int,int,dtype,dtype const*,dtype const*,dtype,dtype*){
  assert(0);
}


221 222
template <typename dtype, 
          dtype (*fadd)(dtype a, dtype b),
solomon's avatar
solomon committed
223 224
          dtype (*fmul)(dtype a, dtype b),
          dtype (*gemm)(char,char,int,int,int,dtype,dtype const*,dtype const*,dtype,dtype*)=&dummy_gemm<dtype> >
225 226 227 228 229 230
class tCTF_Semiring {
  public:
    dtype addid;
    dtype mulid;
    MPI_Op addmop;
  public:
solomon's avatar
solomon committed
231 232 233
    /**
     * \brief constructor
     */
234 235 236 237 238 239 240 241 242
    tCTF_Semiring(dtype  addid_,
                  dtype  mulid_,
                  MPI_Op addmop_){
      addid = addid_;
      mulid = mulid_;
      addmop = addmop_;
    }

    operator CTF_Semiring() {
solomon's avatar
solomon committed
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
      if (gemm == &dummy_gemm<dtype>){
        CTF_Semiring r(sizeof(dtype), 
                       (char const *)&addid, 
                       (char const *)&mulid, 
                       addmop, 
                       &detypedfunc<dtype,fadd>,
                       &detypedfunc<dtype,fmul>);
        return r;
      } else {
        CTF_Semiring r(sizeof(dtype), 
                       (char const *)&addid, 
                       (char const *)&mulid, 
                       addmop, 
                       &detypedfunc<dtype,fadd>,
                       &detypedfunc<dtype,fmul>,
                       &detypedgemm<dtype,gemm>);
        return r;
      }
261 262 263 264 265 266 267 268 269 270
    }
};

class CTF_SemiringElement {
  public:
    CTF_Semiring r;
    char * val;
  
};

271

272
/**
273
 * \brief an instance of a tensor within a tCTF world
274
 */
275 276
template<typename dtype>
class tCTF_Tensor {
277
  public:
278 279
    int tid, ndim;
    int * sym, * len;
280
    char * idx_map;
281
    char const * name;
282
    tCTF_World<dtype> * world;
283 284

  public:
285
    /**
solomon's avatar
solomon committed
286
     * \breif default constructor
287
     */
solomon's avatar
solomon committed
288
    tCTF_Tensor();
289

290 291
    /**
     * \brief copies a tensor (setting data to zero or copying A)
Edgar Solomonik's avatar
Edgar Solomonik committed
292 293
     * \param[in] A tensor to copy
     * \param[in] copy whether to copy the data of A into the new tensor
294
     */
Edgar Solomonik's avatar
Edgar Solomonik committed
295
    tCTF_Tensor(tCTF_Tensor const &   A,
296
                bool                  copy = true);
297 298 299

    /**
     * \brief copies a tensor filled with zeros
Edgar Solomonik's avatar
Edgar Solomonik committed
300 301 302 303
     * \param[in] ndim number of dimensions of tensor
     * \param[in] len edge lengths of tensor
     * \param[in] sym symmetries of tensor (e.g. symmetric matrix -> sym={SY, NS})
     * \param[in] world_ a world for the tensor to live in
304 305
     * \param[in] name an optionary name for the tensor
     * \param[in] profile set to 1 to profile contractions involving this tensor
306
     */
307
    tCTF_Tensor(int                  ndim_,
Edgar Solomonik's avatar
Edgar Solomonik committed
308 309
                int const *          len_,
                int const *          sym_,
310
                tCTF_World<dtype> &  world_,
311 312 313 314 315 316 317 318
#if DEBUG < 3
                char const *  name_ = NULL,
                int           profile_ = 0
#else
                char const *  name_ = "X",
                int           profile_ = 1
#endif
                 );
319 320 321 322 323 324 325 326
    
    /**
     * \brief creates a zeroed out copy (data not copied) of a tensor in a different world
     * \param[in] A tensor whose characteristics to copy
     * \param[in] world_ a world for the tensor we are creating to live in, can be different from A
     */
    tCTF_Tensor(tCTF_Tensor const & A,
                tCTF_World<dtype> & world_);
solomon's avatar
solomon committed
327

328 329
    /**
     * \brief gives the values associated with any set of indices
330 331 332
     * The sparse data is defined in coordinate format. The tensor index (i,j,k,l) of a tensor with edge lengths
     * {m,n,p,q} is associated with the global index g via the formula g=i+j*m+k*m*n+l*m*n*p. The row index is first
     * and the column index is second for matrices, which means they are column major. 
333 334 335 336
     * \param[in] npair number of values to fetch
     * \param[in] global_idx index within global tensor of each value to fetch
     * \param[in,out] data a prealloced pointer to the data with the specified indices
     */
337 338 339
    void read(long_int          npair, 
              long_int const *  global_idx, 
              dtype *           data) const;
340
    
341 342 343 344 345
    /**
     * \brief gives the values associated with any set of indices
     * \param[in] npair number of values to fetch
     * \param[in,out] pairs a prealloced pointer to key-value pairs
     */
346 347 348 349
    void read(long_int          npair,
              tkv_pair<dtype> * pairs) const;
    
    /**
350
     * \brief sparse read: A[global_idx[i]] = alpha*A[global_idx[i]]+beta*data[i]
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
     * \param[in] npair number of values to read into tensor
     * \param[in] alpha scaling factor on read data
     * \param[in] beta scaling factor on value in initial values vector
     * \param[in] global_idx global index within tensor of value to add
     * \param[in] data values to add to the tensor
     */
    void read(long_int         npair, 
              dtype            alpha, 
              dtype            beta,
              long_int const * global_idx,
              dtype *          data) const;

    /**
     * \brief sparse read: pairs[i].d = alpha*A[pairs[i].k]+beta*pairs[i].d
     * \param[in] npair number of values to read into tensor
     * \param[in] alpha scaling factor on read data
     * \param[in] beta scaling factor on value in initial pairs vector
     * \param[in] pairs key-value pairs to add to the tensor
     */
    void read(long_int          npair,
              dtype             alpha,
              dtype             beta,
              tkv_pair<dtype> * pairs) const;
   
375

376 377
    /**
     * \brief writes in values associated with any set of indices
378 379 380
     * The sparse data is defined in coordinate format. The tensor index (i,j,k,l) of a tensor with edge lengths
     * {m,n,p,q} is associated with the global index g via the formula g=i+j*m+k*m*n+l*m*n*p. The row index is first
     * and the column index is second for matrices, which means they are column major. 
381 382 383 384
     * \param[in] npair number of values to write into tensor
     * \param[in] global_idx global index within tensor of value to write
     * \param[in] data values to  write to the indices
     */
385 386 387
    void write(long_int         npair, 
               long_int const * global_idx, 
               dtype const    * data);
388 389 390 391 392 393

    /**
     * \brief writes in values associated with any set of indices
     * \param[in] npair number of values to write into tensor
     * \param[in] pairs key-value pairs to write to the tensor
     */
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
    void write(long_int                 npair,
               tkv_pair<dtype> const *  pairs);
    
    /**
     * \brief sparse add: A[global_idx[i]] = beta*A[global_idx[i]]+alpha*data[i]
     * \param[in] npair number of values to write into tensor
     * \param[in] alpha scaling factor on value to add
     * \param[in] beta scaling factor on original data
     * \param[in] global_idx global index within tensor of value to add
     * \param[in] data values to add to the tensor
     */
    void write(long_int         npair, 
               dtype            alpha, 
               dtype            beta,
               long_int const * global_idx,
               dtype const *    data);

    /**
     * \brief sparse add: A[pairs[i].k] = alpha*A[pairs[i].k]+beta*pairs[i].d
     * \param[in] npair number of values to write into tensor
     * \param[in] alpha scaling factor on value to add
     * \param[in] beta scaling factor on original data
     * \param[in] pairs key-value pairs to add to the tensor
     */
    void write(long_int                npair,
               dtype                   alpha,
               dtype                   beta,
               tkv_pair<dtype> const * pairs);
422 423 424
   
    /**
     * \brief contracts C[idx_C] = beta*C[idx_C] + alpha*A[idx_A]*B[idx_B]
425
     *        if fseq defined computes fseq(alpha,A[idx_A],B[idx_B],beta*C[idx_C])
Edgar Solomonik's avatar
Edgar Solomonik committed
426 427 428 429 430 431 432
     * \param[in] alpha A*B scaling factor
     * \param[in] A first operand tensor
     * \param[in] idx_A indices of A in contraction, e.g. "ik" -> A_{ik}
     * \param[in] B second operand tensor
     * \param[in] idx_B indices of B in contraction, e.g. "kj" -> B_{kj}
     * \param[in] beta C scaling factor
     * \param[in] idx_C indices of C (this tensor),  e.g. "ij" -> C_{ij}
433 434
     * \param[in] fseq sequential operation to execute, default is multiply-add
     */
435
    void contract(dtype                    alpha, 
436 437 438 439
                  const tCTF_Tensor&       A, 
                  char const *             idx_A,
                  const tCTF_Tensor&       B, 
                  char const *             idx_B,
440
                  dtype                    beta,
441 442
                  char const *             idx_C,
                  tCTF_fctr<dtype>         fseq = tCTF_fctr<dtype>());
Edgar Solomonik's avatar
Edgar Solomonik committed
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470

    /**
     * \brief estimate the cost of a contraction C[idx_C] = A[idx_A]*B[idx_B]
     * \param[in] A first operand tensor
     * \param[in] idx_A indices of A in contraction, e.g. "ik" -> A_{ik}
     * \param[in] B second operand tensor
     * \param[in] idx_B indices of B in contraction, e.g. "kj" -> B_{kj}
     * \param[in] idx_C indices of C (this tensor),  e.g. "ij" -> C_{ij}
     * \return cost as a int64_t type, currently a rought estimate of flops/processor
     */
    int64_t estimate_cost(const tCTF_Tensor & A,
                          char const *        idx_A,
                          const tCTF_Tensor & B,
                          char const *        idx_B,
                          char const *        idx_C);
    
    /**
     * \brief estimate the cost of a sum B[idx_B] = A[idx_A]
     * \param[in] A first operand tensor
     * \param[in] idx_A indices of A in contraction, e.g. "ik" -> A_{ik}
     * \param[in] idx_B indices of B in contraction, e.g. "kj" -> B_{kj}
     * \return cost as a int64_t type, currently a rought estimate of flops/processor
     */
    int64_t estimate_cost(const tCTF_Tensor & A,
                          char const *        idx_A,
                          char const *        idx_B);


471
    
472 473
    /**
     * \brief sums B[idx_B] = beta*B[idx_B] + alpha*A[idx_A]
474
     *        if fseq defined computes fseq(alpha,A[idx_A],beta*B[idx_B])
Edgar Solomonik's avatar
Edgar Solomonik committed
475 476 477 478 479
     * \param[in] alpha A scaling factor
     * \param[in] A first operand tensor
     * \param[in] idx_A indices of A in sum, e.g. "ij" -> A_{ij}
     * \param[in] beta B scaling factor
     * \param[in] idx_B indices of B (this tensor), e.g. "ij" -> B_{ij}
480 481
     * \param[in] fseq sequential operation to execute, default is multiply-add
     */
482
    void sum(dtype                   alpha, 
483 484
             const tCTF_Tensor&      A, 
             char const *            idx_A,
485
             dtype                   beta,
486 487
             char const *            idx_B,
             tCTF_fsum<dtype>        fseq = tCTF_fsum<dtype>());
488 489 490
    
    /**
     * \brief scales A[idx_A] = alpha*A[idx_A]
491 492 493 494
     *        if fseq defined computes fseq(alpha,A[idx_A])
     * \param[in] alpha A scaling factor
     * \param[in] idx_A indices of A (this tensor), e.g. "ij" -> A_{ij}
     * \param[in] fseq sequential operation to execute, default is multiply-add
495
     */
496
    void scale(dtype                   alpha, 
497 498
               char const *            idx_A,
               tCTF_fscl<dtype>        fseq = tCTF_fscl<dtype>());
499

500 501 502 503 504 505 506
    /**
     * \brief cuts out a slice (block) of this tensor A[offsets,ends)
     * \param[in] offsets bottom left corner of block
     * \param[in] ends top right corner of block
     * \return new tensor corresponding to requested slice
     */
    tCTF_Tensor slice(int const * offsets,
solomon's avatar
solomon committed
507
                      int const * ends) const;
508
    
509
    tCTF_Tensor slice(long_int corner_off,
solomon's avatar
solomon committed
510
                      long_int corner_end) const;
511
    
512 513 514 515 516 517 518 519 520
    /**
     * \brief cuts out a slice (block) of this tensor A[offsets,ends)
     * \param[in] offsets bottom left corner of block
     * \param[in] ends top right corner of block
     * \return new tensor corresponding to requested slice which lives on
     *          oworld
     */
    tCTF_Tensor slice(int const *         offsets,
                      int const *         ends,
solomon's avatar
solomon committed
521
                      tCTF_World<dtype> * oworld) const;
522 523 524

    tCTF_Tensor slice(long_int            corner_off,
                      long_int            corner_end,
solomon's avatar
solomon committed
525
                      tCTF_World<dtype> * oworld) const;
526 527
    
    
528 529 530 531 532 533 534 535 536 537
    /**
     * \brief cuts out a slice (block) of this tensor = B
     *   B[offsets,ends)=beta*B[offsets,ends) + alpha*A[offsets_A,ends_A)
     * \param[in] offsets bottom left corner of block
     * \param[in] ends top right corner of block
     * \param[in] alpha scaling factor of this tensor
     * \param[in] offsets bottom left corner of block of A
     * \param[in] ends top right corner of block of A
     * \param[in] alpha scaling factor of tensor A
     */
538 539 540
    void slice(int const *    offsets,
               int const *    ends,
               dtype          beta,
solomon's avatar
solomon committed
541
               tCTF_Tensor const & A,
542 543
               int const *    offsets_A,
               int const *    ends_A,
solomon's avatar
solomon committed
544
               dtype          alpha) const;
545
    
546 547 548
    void slice(long_int       corner_off,
               long_int       corner_end,
               dtype          beta,
solomon's avatar
solomon committed
549
               tCTF_Tensor const & A,
550 551
               long_int       corner_off_A,
               long_int       corner_end_A,
solomon's avatar
solomon committed
552
               dtype          alpha) const;
553 554

    /**
solomon's avatar
solomon committed
555
     * \brief Apply permutation to matrix, potentially extracting a slice
556
     *              B[i,j,...] 
557 558
     *                = beta*B[...] + alpha*A[perms_A[0][i],perms_A[1][j],...]
     *
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
     * \param[in] beta scaling factor for values of tensor B (this)
     * \param[in] A specification of operand tensor A must live on 
                    the same CTF_World or a subset of the CTF_World on which B lives
     * \param[in] perms_A specifies permutations for tensor A, e.g. A[perms_A[0][i],perms_A[1][j]]
     *                    if a subarray NULL, no permutation applied to this index,
     *                    if an entry is -1, the corresponding entries of the tensor are skipped 
                            (A must then be smaller than B)
     * \param[in] alpha scaling factor for A tensor
     */
    void permute(dtype          beta,
                 tCTF_Tensor &  A,
                 int * const *  perms_A,
                 dtype          alpha);

    /**
solomon's avatar
solomon committed
574
     * \brief Apply permutation to matrix, potentially extracting a slice
575 576 577
     *              B[perms_B[0][i],perms_B[0][j],...] 
     *                = beta*B[...] + alpha*A[i,j,...]
     *
578
     * \param[in] perms_B specifies permutations for tensor B, e.g. B[perms_B[0][i],perms_B[1][j]]
579 580 581
     *                    if a subarray NULL, no permutation applied to this index,
     *                    if an entry is -1, the corresponding entries of the tensor are skipped 
     *                       (A must then be smaller than B)
582
     * \param[in] beta scaling factor for values of tensor B (this)
583 584
     * \param[in] A specification of operand tensor A must live on 
                    the same CTF_World or a superset of the CTF_World on which B lives
585 586
     * \param[in] alpha scaling factor for A tensor
     */
587
    void permute(int * const *  perms_B,
588 589
                 dtype          beta,
                 tCTF_Tensor &  A,
590
                 dtype          alpha);
591 592 593 594 595
    
   /**
     * \brief accumulates this tensor to a tensor object defined on a different world
     * \param[in] tsr a tensor object of the same characteristic as this tensor, 
     *             but on a different CTF_world/MPI_comm
596 597
     * \param[in] alpha scaling factor for this tensor (default 1.0)
     * \param[in] beta scaling factor for tensor tsr (default 1.0)
598 599
     */
    void add_to_subworld(tCTF_Tensor<dtype> * tsr,
600 601 602
                         dtype alpha,
                         dtype beta) const;
    void add_to_subworld(tCTF_Tensor<dtype> * tsr) const;
603 604 605 606 607
    
   /**
     * \brief accumulates this tensor from a tensor object defined on a different world
     * \param[in] tsr a tensor object of the same characteristic as this tensor, 
     *             but on a different CTF_world/MPI_comm
608 609
     * \param[in] alpha scaling factor for tensor tsr (default 1.0)
     * \param[in] beta scaling factor for this tensor (default 1.0)
610 611
     */
    void add_from_subworld(tCTF_Tensor<dtype> * tsr,
612 613 614
                           dtype alpha,
                           dtype beta) const;
    void add_from_subworld(tCTF_Tensor<dtype> * tsr) const;
615
    
616

Edgar Solomonik's avatar
Edgar Solomonik committed
617 618 619 620
    /**
     * \brief aligns data mapping with tensor A
     * \param[in] A align with this tensor
     */
621
    void align(tCTF_Tensor const & A);
Edgar Solomonik's avatar
Edgar Solomonik committed
622

623 624
    /**
     * \brief performs a reduction on the tensor
625
     * \param[in] op reduction operation (see top of this cyclopstf.hpp for choices)
626
     */    
627
    dtype reduce(CTF_OP op);
628 629
    
    /**
630
     * \brief computes the entrywise 1-norm of the tensor
631 632 633 634
     */    
    dtype norm1(){ return reduce(CTF_OP_NORM1); };

    /**
635
     * \brief computes the frobenius norm of the tensor
636 637 638 639
     */    
    dtype norm2(){ return reduce(CTF_OP_NORM2); };

    /**
640
     * \brief finds the max absolute value element of the tensor
641 642
     */    
    dtype norm_infty(){ return reduce(CTF_OP_MAXABS); };
643 644 645 646 647 648

    /**
     * \brief gives the raw current local data with padding included
     * \param[out] size of local data chunk
     * \return pointer to local data
     */
solomon's avatar
solomon committed
649
    dtype * get_raw_data(long_int * size);
650 651 652 653 654 655

    /**
     * \brief gives a read-only copy of the raw current local data with padding included
     * \param[out] size of local data chunk
     * \return pointer to read-only copy of local data
     */
solomon's avatar
solomon committed
656
    const dtype * raw_data(long_int * size) const;
657 658 659 660 661 662 663

    /**
     * \brief gives the global indices and values associated with the local data
     * \param[out] npair number of local values
     * \param[out] global_idx index within global tensor of each data value
     * \param[out] data pointer to local values in the order of the indices
     */
664 665 666
    void read_local(long_int *   npair, 
                    long_int **  global_idx, 
                    dtype **     data) const;
667

668 669 670 671 672
    /**
     * \brief gives the global indices and values associated with the local data
     * \param[out] npair number of local values
     * \param[out] pairs pointer to local key-value pairs
     */
673 674
    void read_local(long_int *         npair,
                    tkv_pair<dtype> ** pairs) const;
675

676 677 678 679 680
    /**
     * \brief collects the entire tensor data on each process (not memory scalable)
     * \param[out] npair number of values in the tensor
     * \param[out] data pointer to the data of the entire tensor
     */
681 682
    void read_all(long_int * npair, 
                  dtype **   data) const;
683 684 685 686 687
    
    /**
     * \brief collects the entire tensor data on each process (not memory scalable)
     * \param[in,out] preallocated data pointer to the data of the entire tensor
     */
688
    long_int read_all(dtype * data) const;
689

690 691 692 693 694 695 696 697
    /**
     * \brief obtains a small number of the biggest elements of the 
     *        tensor in sorted order (e.g. eigenvalues)
     * \param[in] n number of elements to collect
     * \param[in] data output data (should be preallocated to size at least n)
     *
     * WARNING: currently functional only for dtype=double
     */
698
    void get_max_abs(int        n,
699 700
                     dtype *    data);

701 702 703
    /**
     * \brief turns on profiling for tensor
     */
704 705
    void profile_on();
    
706 707 708
    /**
     * \brief turns off profiling for tensor
     */
709 710 711 712 713 714 715
    void profile_off();

    /**
     * \brief sets tensor name
     * \param[in] name new for tensor
     */
    void set_name(char const * name);
716 717 718 719

    /**
     * \brief sets all values in the tensor to val
     */
720
    tCTF_Tensor& operator=(dtype val);
721
    
722 723 724 725 726
    /**
     * \brief sets the tensor
     */
    void operator=(tCTF_Tensor<dtype> A);
    
727 728
    /**
     * \brief associated an index map with the tensor for future operation
729
     * \param[in] idx_map_ index assignment for this tensor
730
     */
731
    tCTF_Idx_Tensor<dtype> operator[](char const * idx_map_);
732
    
733 734 735 736
    /**
     * \brief gives handle to sparse index subset of tensors
     * \param[in] indices, vector of indices to sparse tensor
     */
737
    tCTF_Sparse_Tensor<dtype> operator[](std::vector<long_int> indices);
738
    
739 740
    /**
     * \brief prints tensor data to file using process 0
741
     * \param[in] fp file to print to e.g. stdout
742 743 744 745 746 747 748 749 750
     * \param[in] cutoff do not print values of absolute value smaller than this
     */
    void print(FILE * fp = stdout, double cutoff = -1.0) const;

    /**
     * \brief prints two sets of tensor data side-by-side to file using process 0
     * \param[in] fp file to print to e.g. stdout
     * \param[in] A tensor to compare against
     * \param[in] cutoff do not print values of absolute value smaller than this
751
     */
752
    void compare(const tCTF_Tensor<dtype>& A, FILE * fp = stdout, double cutoff = -1.0) const;
753 754

    /**
755
     * \brief frees tCTF tensor
756
     */
757
    ~tCTF_Tensor();
758
};
759

Ducky's avatar
Ducky committed
760 761 762 763 764 765 766
/**
 * \brief comparison function for sets of tensor pointers
 * This ensures the set iteration order is consistent across nodes
 */
template<typename dtype>
struct tensor_tid_less {
  bool operator()(tCTF_Tensor<dtype>* A, tCTF_Tensor<dtype>* B) {
Ducky's avatar
Ducky committed
767 768 769 770 771
    if (A == NULL && B != NULL) {
      return true;
    } else if (A == NULL || B == NULL) {
      return false;
    }
Ducky's avatar
Ducky committed
772 773 774 775
    return A->tid < B->tid;
  }
};

776 777 778 779 780 781 782 783 784 785 786 787 788 789
/**
 * \brief Matrix class which encapsulates a 2D tensor 
 */
template<typename dtype> 
class tCTF_Matrix : public tCTF_Tensor<dtype> {
  public:
    int nrow, ncol, sym;

    /**
     * \brief constructor for a matrix
     * \param[in] nrow number of matrix rows
     * \param[in] ncol number of matrix columns
     * \param[in] sym symmetry of matrix
     * \param[in] world CTF world where the tensor will live
790 791
     * \param[in] name_ an optionary name for the tensor
     * \param[in] profile_ set to 1 to profile contractions involving this tensor
792
     */ 
793 794 795
    tCTF_Matrix(int                 nrow_, 
                int                 ncol_, 
                int                 sym_,
796 797
                tCTF_World<dtype> & world,
                char const *        name_ = NULL,
798
                int                 profile_ = 0);
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813

};

/**
 * \brief Vector class which encapsulates a 1D tensor 
 */
template<typename dtype> 
class tCTF_Vector : public tCTF_Tensor<dtype> {
  public:
    int len;

    /**
     * \brief constructor for a vector
     * \param[in] len_ dimension of vector
     * \param[in] world CTF world where the tensor will live
814 815
     * \param[in] name_ an optionary name for the tensor
     * \param[in] profile_ set to 1 to profile contractions involving this tensor
816
     */ 
817
    tCTF_Vector(int                 len_,
818 819
                tCTF_World<dtype> & world,
                char const *        name_ = NULL,
820
                int                 profile_ = 0);
Edgar Solomonik's avatar
Edgar Solomonik committed
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
};

/**
 * \brief Scalar class which encapsulates a 0D tensor 
 */
template<typename dtype> 
class tCTF_Scalar : public tCTF_Tensor<dtype> {
  public:

    /**
     * \brief constructor for a scalar
     * \param[in] world CTF world where the tensor will live
     */ 
    tCTF_Scalar(tCTF_World<dtype> & world);
    
    /**
     * \brief constructor for a scalar with predefined value
     * \param[in] val scalar value
     * \param[in] world CTF world where the tensor will live
     */ 
841
    tCTF_Scalar(dtype               val,
Edgar Solomonik's avatar
Edgar Solomonik committed
842 843 844 845 846 847 848 849 850 851
                tCTF_World<dtype> & world);

    /**
     * \brief returns scalar value
     */
    dtype get_val();
    
    /**
     * \brief sets scalar value
     */
852
    void set_val(dtype val);
Edgar Solomonik's avatar
Edgar Solomonik committed
853 854 855 856 857

    /**
     * \brief casts into a dtype value
     */
    operator dtype() { return get_val(); }
858 859
};

860 861 862 863
/**
 * \brief a tensor with an index map associated with it (necessary for overloaded operators)
 */
template<typename dtype>
864
class tCTF_Idx_Tensor : public tCTF_Term<dtype> {
865 866 867
  public:
    tCTF_Tensor<dtype> * parent;
    char * idx_map;
868
    int is_intm;
869 870

  public:
871 872 873

  
    // dervied clone calls copy constructor
Ducky's avatar
Ducky committed
874
    tCTF_Term<dtype> * clone(std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL) const;
875

876 877 878 879
    /**
     * \brief constructor takes in a parent tensor and its indices 
     * \param[in] parent_ the parent tensor
     * \param[in] idx_map_ the indices assigned ot this tensor
880
     * \param[in] copy if set to 1, create copy of parent
881
     */
882 883 884 885 886 887 888
    tCTF_Idx_Tensor(tCTF_Tensor<dtype>* parent_, 
                    const char *        idx_map_,
                    int                 copy = 0);
    
    /**
     * \brief copy constructor
     * \param[in] B tensor to copy
889
     * \param[in] copy if 1 then copy the parent tensor of B into a new tensor
890
     */
891
    tCTF_Idx_Tensor(tCTF_Idx_Tensor<dtype> const & B,
Ducky's avatar
Ducky committed
892 893
                    int copy = 0,
                    std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL);
894

895
    tCTF_Idx_Tensor();
896
    
897 898
    tCTF_Idx_Tensor(dtype val);
    
899 900
    ~tCTF_Idx_Tensor();
    
901 902 903 904 905
    /**
     * \brief evalues the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
     */
906
    tCTF_Idx_Tensor<dtype> execute() const;
907 908 909 910 911
    
    /**
     * \brief evalues the expression, which just scales by default
     * \param[in,out] output tensor to write results into and its indices
     */
912
    void execute(tCTF_Idx_Tensor<dtype> output) const;
913
    
solomon's avatar
solomon committed
914 915 916 917 918 919 920 921 922 923 924 925 926
    /**
     * \brief estimates the cost of a contraction
     * \param[in] output tensor to write results into and its indices
     */
    long_int estimate_cost(tCTF_Idx_Tensor<dtype> output) const;
    
    /**
     * \brief estimates the cost the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
     */
    tCTF_Idx_Tensor<dtype> estimate_cost(long_int & cost) const;
    
927
    /**
Ducky's avatar
Ducky committed
928 929
    * \brief appends the tensors this depends on to the input set
    */
Ducky's avatar
Ducky committed
930
    void get_inputs(std::set<tCTF_Tensor<dtype>*, tensor_tid_less<dtype> >* inputs_set) const;
931

932 933 934 935
    /**
     * \brief A = B, compute any operations on operand B and set
     * \param[in] B tensor on the right hand side
     */
936 937
    void operator=(tCTF_Term<dtype> const & B);
    void operator=(tCTF_Idx_Tensor<dtype> const & B);
938 939 940 941 942

    /**
     * \brief A += B, compute any operations on operand B and add
     * \param[in] B tensor on the right hand side
     */
943
    void operator+=(tCTF_Term<dtype> const & B);
944
    
Edgar Solomonik's avatar
Edgar Solomonik committed
945 946 947 948
    /**
     * \brief A += B, compute any operations on operand B and add
     * \param[in] B tensor on the right hand side
     */
949
    void operator-=(tCTF_Term<dtype> const & B);
Edgar Solomonik's avatar
Edgar Solomonik committed
950
    
951 952 953 954
    /**
     * \brief A -> A*B contract two tensors
     * \param[in] B tensor on the right hand side
     */
955
    void operator*=(tCTF_Term<dtype> const & B);
956 957

    /**
958 959
     * \brief TODO A -> A * B^-1
     * \param[in] B
960
     */
961 962
    //void operator/(tCTF_IdxTensor& tsr);
    
963
    /**
964 965 966 967
     * \brief execute ips into output with scale beta
     */    
    //void run(tCTF_Idx_Tensor<dtype>* output, dtype  beta);

968 969 970 971 972 973 974 975
    /*operator tCTF_Term<dtype>* (){
      tCTF_Idx_Tensor * tsr = new tCTF_Idx_Tensor(*this);
      return tsr;
    }*/
    /**
     * \brief figures out what world this term lives on
     */
    tCTF_World<dtype> * where_am_i() const;
976 977
};

978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146

/**
 * \brief element-wise function for scaling the elements of a tensor via the scale() function
 */
template<typename dtype>
class tCTF_fscl {
  public:
    /**
     * \brief function signature for element-wise scale operation
     * \param[in] alpha scaling value, defined in scale call 
     *            but subject to internal change due to symmetry
     * \param[in,out] a element from tensor A
     **/
    void  (*func_ptr)(dtype   alpha, 
                      dtype & a);
  public:
    tCTF_fscl() { func_ptr = NULL; }
};
/**
 * \brief Interface for custom element-wise function for tensor summation to be used with sum() call on tensors
 */
template<typename dtype>
class tCTF_fsum {
  public:
    /**
     * \brief function signature for element-wise summation operation
     * \param[in] alpha scaling value, defined in summation call 
     *            but subject to internal change due to symmetry
     * \param[in] a element from summand tensor A
     * \param[in,out] b element from summand tensor B
     **/
    void  (*func_ptr)(dtype   alpha, 
                      dtype   a,
                      dtype  &b);
  public:
    tCTF_fsum() { func_ptr = NULL; }
};

/**
 * \brief Interface for custom element-wise function for tensor contraction to be used with contract() call on tensors
 */
template<typename dtype>
class tCTF_fctr {
  public:
    /**
     * \brief function signature for element-wise contraction operation
     * \param[in] alpha scaling value, defined in contraction call 
     *            but subject to internal change due to symmetry
     * \param[in] a element from contraction tensor A
     * \param[in] b element from contraction tensor B
     * \param[in,out] c element from contraction tensor C
     **/
    void  (*func_ptr)(dtype  alpha, 
                      dtype  a, 
                      dtype  b,
                      dtype &c);
  public:
    tCTF_fctr() { func_ptr = NULL; }
};


/**
 * \brief a sparse subset of a tensor 
 */
template<typename dtype>
class tCTF_Sparse_Tensor {
  public:
    tCTF_Tensor<dtype> * parent;
    std::vector<long_int> indices;
    dtype scale;

    /** 
      * \brief base constructor 
      */
    tCTF_Sparse_Tensor();
    
    /**
     * \brief initialize a tensor which corresponds to a set of indices 
     * \param[in] indices a vector of global indices to tensor values
     * \param[in] parent dense distributed tensor to which this sparse tensor belongs to
     */
    tCTF_Sparse_Tensor(std::vector<long_int> indices,
                       tCTF_Tensor<dtype> * parent);

    /**
     * \brief initialize a tensor which corresponds to a set of indices 
     * \param[in] number of values this sparse tensor will have locally
     * \param[in] indices an array of global indices to tensor values
     * \param[in] parent dense distributed tensor to which this sparse tensor belongs to
     */
    tCTF_Sparse_Tensor(long_int              n,
                       long_int *            indices,
                       tCTF_Tensor<dtype> * parent);

    /**
     * \brief set the sparse set of indices on the parent tensor to values
     *        forall(j) i = indices[j]; parent[i] = beta*parent[i] + alpha*values[j];
     * \param[in] alpha scaling factor on values array 
     * \param[in] values data, should be of same size as the number of indices (n)
     * \param[in] beta scaling factor to apply to previously existing data
     */
    void write(dtype              alpha, 
               dtype *            values,
               dtype              beta); 

    // C++ overload special-cases of above method
    void operator=(std::vector<dtype> values); 
    void operator+=(std::vector<dtype> values); 
    void operator-=(std::vector<dtype> values); 
    void operator=(dtype * values); 
    void operator+=(dtype * values); 
    void operator-=(dtype * values); 

    /**
     * \brief read the sparse set of indices on the parent tensor to values
     *        forall(j) i = indices[j]; values[j] = alpha*parent[i] + beta*values[j];
     * \param[in] alpha scaling factor on parent array 
     * \param[in] values data, should be preallocated to the same size as the number of indices (n)
     * \param[in] beta scaling factor to apply to previously existing data in values
     */
    void read(dtype              alpha, 
              dtype *            values,
              dtype              beta); 

    // C++ overload special-cases of above method
    operator std::vector<dtype>();
    operator dtype*();
};



/* these typedefs yield a non-tempalated interface for double and complex<double> */
// \brief a world for double precision tensor types 
typedef tCTF<double>                        CTF;
typedef tCTF_Idx_Tensor<double>             CTF_Idx_Tensor;
typedef tCTF_Tensor<double>                 CTF_Tensor;
typedef tCTF_Sparse_Tensor<double>          CTF_Sparse_Tensor;
typedef tCTF_Matrix<double>                 CTF_Matrix;
typedef tCTF_Vector<double>                 CTF_Vector;
typedef tCTF_Scalar<double>                 CTF_Scalar;
typedef tCTF_World<double>                  CTF_World;
typedef tCTF_fscl<double>                   CTF_fscl;
typedef tCTF_fsum<double>                   CTF_fsum;
typedef tCTF_fctr<double>                   CTF_fctr;
#ifdef CTF_COMPLEX
// \brief a world for complex double precision tensor types 
typedef tCTF< std::complex<double> >        cCTF;
typedef tCTF_Idx_Tensor< std::complex<double> > cCTF_Idx_Tensor;
typedef tCTF_Tensor< std::complex<double> > cCTF_Tensor;
typedef tCTF_Sparse_Tensor< std::complex<double> > cCTF_Sparse_Tensor;
typedef tCTF_Matrix< std::complex<double> > cCTF_Matrix;
typedef tCTF_Vector< std::complex<double> > cCTF_Vector;
typedef tCTF_Scalar< std::complex<double> > cCTF_Scalar;
typedef tCTF_World< std::complex<double> >  cCTF_World;
typedef tCTF_fscl< std::complex<double> >   cCTF_fscl;
typedef tCTF_fsum< std::complex<double> >   cCTF_fsum;
typedef tCTF_fctr< std::complex<double> >   cCTF_fctr;
#endif

/**
 * @}
 */

/**
 * \defgroup expression Tensor expression compiler
 * @{
 */


1147
/**
1148
 * \brief a term is an abstract object representing some expression of tensors
1149 1150 1151 1152
 */
template<typename dtype>
class tCTF_Term {
  public:
1153 1154 1155 1156
    dtype scale;
   
    tCTF_Term();
    virtual ~tCTF_Term(){};
1157 1158

    /**
1159
     * \brief base classes must implement this copy function to retrieve pointer
1160
     */ 
Ducky's avatar
Ducky committed
1161
    virtual tCTF_Term * clone(std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL) const = 0;
1162
    
1163 1164 1165
    /**
     * \brief evalues the expression, which just scales by default
     * \param[in,out] output tensor to write results into and its indices
1166
     */
1167
    virtual void execute(tCTF_Idx_Tensor<dtype> output) const = 0;
1168
    
solomon's avatar
solomon committed
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183
    /**
     * \brief estimates the cost of a contraction/sum/.. term
     * \param[in] output tensor to write results into and its indices
     */
    virtual long_int estimate_cost(tCTF_Idx_Tensor<dtype> output) const = 0;
    
    /**
     * \brief estimates the cost the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param\[in,out] cost the cost of the operatiob
     * \return output tensor to write results into and its indices
     */
    virtual tCTF_Idx_Tensor<dtype> estimate_cost(long_int & cost) const = 0;
    
    
1184
    /**
1185 1186 1187
     * \brief evalues the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
1188
     */
1189
    virtual tCTF_Idx_Tensor<dtype> execute() const = 0;
1190
    
1191
    /**
Ducky's avatar
Ducky committed
1192 1193
    * \brief appends the tensors this depends on to the input set
    */
Ducky's avatar
Ducky committed
1194
    virtual void get_inputs(std::set<tCTF_Tensor<dtype>*, tensor_tid_less<dtype> >* inputs_set) const = 0;
1195

1196
    /**
1197 1198 1199
     * \brief constructs a new term which multiplies by tensor A
     * \param[in] A term to multiply by
     */
1200
    tCTF_Contract_Term<dtype> operator*(tCTF_Term<dtype> const & A) const;
1201 1202 1203 1204 1205
    
    /**
     * \brief constructs a new term by addition of two terms
     * \param[in] A term to add to output
     */
1206
    tCTF_Sum_Term<dtype> operator+(tCTF_Term<dtype> const & A) const;
1207 1208 1209 1210 1211
    
    /**
     * \brief constructs a new term by subtracting term A
     * \param[in] A subtracted term
     */
1212
    tCTF_Sum_Term<dtype> operator-(tCTF_Term<dtype> const & A) const;
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
    
    /**
     * \brief A = B, compute any operations on operand B and set
     * \param[in] B tensor on the right hand side
     */
    void operator=(tCTF_Term<dtype> const & B) { execute() = B; };
    void operator=(tCTF_Idx_Tensor<dtype> const & B) { execute() = B; };
    void operator+=(tCTF_Term<dtype> const & B) { execute() += B; };
    void operator-=(tCTF_Term<dtype> const & B) { execute() -= B; };
    void operator*=(tCTF_Term<dtype> const & B) { execute() *= B; };
1223 1224 1225 1226

    /**
     * \brief multiples by a constant
     * \param[in] scl scaling factor to multiply term by
1227
     */
1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
    tCTF_Contract_Term<dtype> operator*(dtype scl) const;

    /**
     * \brief figures out what world this term lives on
     */
    virtual tCTF_World<dtype> * where_am_i() const = 0;

    /**
     * \brief casts into a double if dimension of evaluated expression is 0
     */
    operator dtype() const;
1239
};
1240

1241
template<typename dtype>
1242
class tCTF_Sum_Term : public tCTF_Term<dtype> {
1243
  public:
1244 1245 1246 1247 1248 1249 1250 1251 1252
    std::vector< tCTF_Term<dtype>* > operands;

    // default constructor
    tCTF_Sum_Term() : tCTF_Term<dtype>() {}

    // destructor frees operands
    ~tCTF_Sum_Term();
  
    // copy constructor
Ducky's avatar
Ducky committed
1253 1254
    tCTF_Sum_Term(tCTF_Sum_Term<dtype> const & other,
        std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL);
1255 1256

    // dervied clone calls copy constructor
Ducky's avatar
Ducky committed
1257
    tCTF_Term<dtype>* clone(std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL) const;
1258 1259

    /**
1260 1261 1262 1263 1264 1265 1266 1267
     * construct sum term corresponding to a single tensor
     * \param[in] output tensor to write results into and its indices
     */ 
    //tCTF_Sum_Term<dtype>(tCTF_Idx_Tensor<dtype> const & tsr);

    /**
     * \brief evalues the expression by summing operands into output
     * \param[in,out] output tensor to write results into and its indices
1268
     */
1269
    void execute(tCTF_Idx_Tensor<dtype> output) const;
1270

solomon's avatar
solomon committed
1271
  
Edgar Solomonik's avatar
Edgar Solomonik committed
1272
    /**
1273 1274 1275
     * \brief evalues the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
Edgar Solomonik's avatar
Edgar Solomonik committed
1276
     */
1277
    tCTF_Idx_Tensor<dtype> execute() const;
1278
    
solomon's avatar
solomon committed
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
    /**
     * \brief estimates the cost of a sum term
     * \param[in] output tensor to write results into and its indices
     */
    long_int estimate_cost(tCTF_Idx_Tensor<dtype> output) const;
    
    /**
     * \brief estimates the cost the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
     */
    tCTF_Idx_Tensor<dtype> estimate_cost(long_int & cost) const;
    
    
    
1294
    /**
Ducky's avatar
Ducky committed
1295 1296
    * \brief appends the tensors this depends on to the input set
    */
Ducky's avatar
Ducky committed
1297
    void get_inputs(std::set<tCTF_Tensor<dtype>*, tensor_tid_less<dtype> >* inputs_set) const;
1298

1299 1300 1301 1302
    /**
     * \brief constructs a new term by addition of two terms
     * \param[in] A term to add to output
     */
1303
    tCTF_Sum_Term<dtype> operator+(tCTF_Term<dtype> const & A) const;
1304 1305 1306 1307 1308
    
    /**
     * \brief constructs a new term by subtracting term A
     * \param[in] A subtracted term
     */
1309
    tCTF_Sum_Term<dtype> operator-(tCTF_Term<dtype> const & A) const;
1310

1311 1312 1313 1314
    /**
     * \brief figures out what world this term lives on
     */
    tCTF_World<dtype> * where_am_i() const;
1315 1316
};

1317 1318 1319 1320 1321 1322 1323 1324
template<typename dtype> static
tCTF_Contract_Term<dtype> operator*(double d, tCTF_Term<dtype> const & tsr){
  return (tsr*d);
}

/**
 * \brief An experession representing a contraction of a set of tensors contained in operands 
 */
1325
template<typename dtype>
1326
class tCTF_Contract_Term : public tCTF_Term<dtype> {
1327
  public:
1328 1329
    std::vector< tCTF_Term<dtype>* > operands;

1330
    // \brief default constructor
1331 1332
    tCTF_Contract_Term() : tCTF_Term<dtype>() {}

1333
    // \brief destructor frees operands
1334 1335
    ~tCTF_Contract_Term();
  
1336
    // \brief copy constructor
Ducky's avatar
Ducky committed
1337 1338
    tCTF_Contract_Term(tCTF_Contract_Term<dtype> const & other,
        std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL);
1339

1340
    // \brief dervied clone calls copy constructor
Ducky's avatar
Ducky committed
1341
    tCTF_Term<dtype> * clone(std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL) const;
1342

1343
    /**
1344
     * \brief override execution to  to contract operands and add them to output
1345 1346
     * \param[in,out] output tensor to write results into and its indices
     */
1347
    void execute(tCTF_Idx_Tensor<dtype> output) const;
1348
    
1349
    /**
Ducky's avatar
Ducky committed
1350 1351
    * \brief appends the tensors this depends on to the input set
    */
Ducky's avatar
Ducky committed
1352
    void get_inputs(std::set<tCTF_Tensor<dtype>*, tensor_tid_less<dtype> >* inputs_set) const;
1353

1354 1355 1356 1357 1358
    /**
     * \brief evalues the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
     */
1359
    tCTF_Idx_Tensor<dtype> execute() const;
1360
    
solomon's avatar
solomon committed
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
    /**
     * \brief estimates the cost of a contract term
     * \param[in] output tensor to write results into and its indices
     */
    long_int estimate_cost(tCTF_Idx_Tensor<dtype> output) const;
    
    /**
     * \brief estimates the cost the expression to produce an intermediate with 
     *        all expression indices remaining
     * \param[in,out] output tensor to write results into and its indices
     */
    tCTF_Idx_Tensor<dtype> estimate_cost(long_int & cost) const;
    
    
1375 1376 1377 1378
    /**
     * \brief override contraction to grow vector rather than create recursive terms
     * \param[in] A term to multiply by
     */
1379 1380 1381 1382 1383 1384
    tCTF_Contract_Term<dtype> operator*(tCTF_Term<dtype> const & A) const;

    /**
     * \brief figures out what world this term lives on
     */
    tCTF_World<dtype> * where_am_i() const;
1385 1386
};
/**
1387
 * @}
1388 1389
 */

Ducky's avatar
Ducky committed
1390 1391 1392 1393
/**
 * \defgroup scheduler Dynamic scheduler.
 * @{
 */
Ducky's avatar
Ducky committed
1394
enum tCTF_TensorOperationTypes {
1395
  TENSOR_OP_NONE,
Ducky's avatar
Ducky committed
1396 1397 1398 1399
  TENSOR_OP_SET,
  TENSOR_OP_SUM,
  TENSOR_OP_SUBTRACT,
  TENSOR_OP_MULTIPLY };
Ducky's avatar
Ducky committed
1400

1401 1402 1403
/**
 * \brief Provides a untemplated base class for tensor operations.
 */
Ducky's avatar
Ducky committed
1404
class tCTF_TensorOperationBase {
Ducky's avatar
Ducky committed
1405 1406
public:
  virtual ~tCTF_TensorOperationBase() {}
Ducky's avatar
Ducky committed
1407 1408
};

1409 1410 1411 1412 1413
/**
 * \brief A tensor operation, containing all the data (op, lhs, rhs) required
 * to run it. Also provides methods to get a list of inputs and outputs, as well
 * as successor and dependency information used in scheduling.
 */
Ducky's avatar
Ducky committed
1414
template<typename dtype>
Ducky's avatar
Ducky committed
1415
class tCTF_TensorOperation : public tCTF_TensorOperationBase {
Ducky's avatar
Ducky committed
1416 1417 1418 1419 1420 1421
public:
	/**
	 * \brief Constructor, create the tensor operation lhs op= rhs
	 */
	tCTF_TensorOperation(tCTF_TensorOperationTypes op,
			tCTF_Idx_Tensor<dtype>* lhs,
Ducky's avatar
Ducky committed
1422
			const tCTF_Term<dtype>* rhs) :
solomon's avatar
solomon committed
1423
			  dependency_count(0),
Ducky's avatar
Ducky committed
1424 1425
			  op(op),
			  lhs(lhs),
1426
			  rhs(rhs),
Ducky's avatar
Ducky committed
1427
			  cached_estimated_cost(0) {}
Ducky's avatar
Ducky committed
1428

Ducky's avatar
Ducky committed
1429 1430 1431
  /**
   * \brief appends the tensors this writes to to the input set
   */
Ducky's avatar
Ducky committed
1432
  void get_outputs(std::set<tCTF_Tensor<dtype>*, tensor_tid_less<dtype> >* outputs_set) const;
Ducky's avatar
Ducky committed
1433 1434

	/**
Ducky's avatar
Ducky committed
1435 1436
	 * \brief appends the tensors this depends on (reads from, including the output
	 * if a previous value is required) to the input set
Ducky's avatar
Ducky committed
1437
	 */
Ducky's avatar
Ducky committed
1438
	void get_inputs(std::set<tCTF_Tensor<dtype>*, tensor_tid_less<dtype> >* inputs_set) const;
Ducky's avatar
Ducky committed
1439

1440 1441
	/**
	 * \brief runs this operation, but does NOT handle dependency scheduling
Ducky's avatar
Ducky committed
1442
	 * optionally takes a remapping of tensors
1443
	 */
Ducky's avatar
Ducky committed
1444
	void execute(std::map<tCTF_Tensor<dtype>*, tCTF_Tensor<dtype>*>* remap = NULL);
Ducky's avatar
Ducky committed
1445

1446 1447 1448 1449 1450
	/**
	 *\brief provides an estimated runtime cost
	 */
	long_int estimate_cost();

Ducky's avatar
Ducky committed
1451 1452 1453 1454
	bool is_dummy() {
	  return op == TENSOR_OP_NONE;
	}

1455 1456 1457
  /**
   * Schedule Recording Variables
   */
1458
	// Number of dependencies I have
1459
  int dependency_count;
1460
  // List of all successors - operations that depend on me
1461
  std::vector<tCTF_TensorOperation<dtype>* > successors;
1462
  std::vector<tCTF_TensorOperation<dtype>* > reads;
1463

1464 1465 1466 1467 1468
  /**
   * Schedule Execution Variables
   */
  int dependency_left;

Ducky's avatar
Ducky committed
1469 1470 1471 1472 1473 1474 1475
  /**
   * Debugging Helpers
   */
  const char* name() {
    return lhs->parent->name;
  }

Ducky's avatar
Ducky committed
1476 1477 1478
protected:
	tCTF_TensorOperationTypes op;
	tCTF_Idx_Tensor<dtype>* lhs;
Ducky's avatar
Ducky committed
1479
	const tCTF_Term<dtype>* rhs;
Ducky's avatar
Ducky committed
1480 1481

	long_int cached_estimated_cost;
Ducky's avatar
Ducky committed
1482 1483 1484 1485
};

// untemplatized scheduler abstract base class to assist in global operations
class tCTF_ScheduleBase {
Ducky's avatar
Ducky committed
1486
public:
Ducky's avatar
Ducky committed
1487 1488 1489 1490 1491
	virtual void add_operation(tCTF_TensorOperationBase* op) = 0;
};

extern tCTF_ScheduleBase* global_schedule;

Ducky's avatar
Ducky committed
1492 1493 1494
struct tCTF_ScheduleTimer {
  double comm_down_time;
  double exec_time;
Ducky's avatar
Ducky committed
1495 1496
  double imbalance_wall_time;
  double imbalance_acuum_time;
Ducky's avatar
Ducky committed
1497 1498 1499 1500 1501 1502
  double comm_up_time;
  double total_time;

  tCTF_ScheduleTimer():
    comm_down_time(0),
    exec_time(0),
Ducky's avatar
Ducky committed
1503 1504
    imbalance_wall_time(0),
    imbalance_acuum_time(0),
Ducky's avatar
Ducky committed
1505 1506 1507 1508 1509 1510
    comm_up_time(0),
    total_time(0) {}

  void operator+=(tCTF_ScheduleTimer const & B) {
    comm_down_time += B.comm_down_time;
    exec_time += B.exec_time;
Ducky's avatar
Ducky committed
1511 1512
    imbalance_wall_time += B.imbalance_wall_time;
    imbalance_acuum_time += B.imbalance_acuum_time;
Ducky's avatar
Ducky committed
1513 1514 1515 1516 1517
    comm_up_time += B.comm_up_time;
    total_time += B.total_time;
  }
};

Ducky's avatar
Ducky committed
1518 1519 1520
template<typename dtype>
class tCTF_Schedule : public tCTF_ScheduleBase {
public:
Ducky's avatar
Ducky committed
1521 1522 1523 1524 1525
  /**
   * \brief Constructor, optionally specifying a world to restrict processor
   * allocations to
   */
  tCTF_Schedule(tCTF_World<dtype>* world = NULL) :
Ducky's avatar
Ducky committed
1526 1527
    world(world),
    partitions(0) {}
Ducky's avatar
Ducky committed
1528

Ducky's avatar
Ducky committed
1529 1530 1531 1532 1533 1534 1535 1536 1537
	/**
	 * \brief Starts recording all tensor operations to this schedule
	 * (instead of executing them immediately)
	 */
	void record();

	/**
	 * \brief Executes the schedule and implicitly terminates recording
	 */
Ducky's avatar
Ducky committed
1538
	tCTF_ScheduleTimer execute();
Ducky's avatar
Ducky committed
1539

Ducky's avatar
Ducky committed
1540 1541 1542 1543
  /**
   * \brief Executes a slide of the ready_queue, partitioning it among the
   * processors in the grid
   */
Ducky's avatar
Ducky committed
1544
  inline tCTF_ScheduleTimer partition_and_execute();
Ducky's avatar
Ducky committed
1545

1546
	/**
Ducky's avatar
Ducky committed
1547
	 * \brief Call when a tensor op finishes, this adds newly enabled ops to the ready queue
1548
	 */
Ducky's avatar
Ducky committed
1549
	inline void schedule_op_successors(tCTF_TensorOperation<dtype>* op);
1550

Ducky's avatar
Ducky committed
1551 1552 1553 1554 1555 1556 1557 1558
	/**
	 * \brief Adds a tensor operation to this schedule.
	 * THIS IS CALL ORDER DEPENDENT - operations will *appear* to execute
	 * sequentially in the order they were added.
	 */
	void add_operation_typed(tCTF_TensorOperation<dtype>* op);
	void add_operation(tCTF_TensorOperationBase* op);

Ducky's avatar
Ducky committed
1559 1560 1561 1562 1563 1564 1565
	/**
	 * Testing functionality
	 */
	void set_max_partitions(int in_partitions) {
	  partitions = in_partitions;
	}

Ducky's avatar
Ducky committed
1566
protected:
Ducky's avatar
Ducky committed
1567 1568
	tCTF_World<dtype>* world;

Ducky's avatar
Ducky committed
1569 1570 1571
	/**
	 * Internal scheduling operation overview:
	 * DAG Structure:
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
	 *  Each task maintains:
	 *    dependency_count: the number of dependencies that the task has
	 *    dependency_left: the number of dependencies left before this task can
	 *      execute
	 *    successors: a vector of tasks which has this as a dependency
	 *  On completing a task, it decrements the dependency_left of all
	 *  successors. Once the count reaches zero, the task is added to the ready
	 *  queue and can be scheduled for execution.
	 *  To allow one schedule to be executed many times, dependency_count is
	 *  only modified by recording tasks, and is copied to dependency_left when
	 *  the schedule starts executing.
Ducky's avatar
Ducky committed
1583 1584
	 *
	 * DAG Construction:
1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
	 *  A map from tensors pointers to operations is maintained, which contains
	 *  the latest operation that writes to a tensor.
	 *  When a new operation is added, it checks this map for all dependencies.
	 *  If a dependency has no entry yet, then it is considered satisfied.
	 *  Otherwise, it depends on the current entry - and the latest write
	 *  operation adds this task as a successor.
	 *  Then, the latest_write for this operation is updated.
	 */

	/**
	 * Schedule Recording Variables
Ducky's avatar
Ducky committed
1596
	 */
1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
	// Tasks with no dependencies, which can be executed at the start
	std::deque<tCTF_TensorOperation<dtype>*> root_tasks;

  // For debugging purposes - the steps in the original input order
  std::deque<tCTF_TensorOperation<dtype>*> steps_original;

  // Last operation writing to the key tensor
  std::map<tCTF_Tensor<dtype>*, tCTF_TensorOperation<dtype>*> latest_write;

  /**
   * Schedule Execution Variables
   */
  // Ready queue of tasks with all dependencies satisfied
  std::deque<tCTF_TensorOperation<dtype>*> ready_tasks;
Ducky's avatar
Ducky committed
1611

Ducky's avatar
Ducky committed
1612 1613 1614 1615 1616
  /**
   * Testing variables
   */
  int partitions;

Ducky's avatar
Ducky committed
1617 1618 1619 1620 1621 1622
};
/**
 * @}
 */


1623
#define MAX_NAME_LENGTH 53
solomon's avatar
solomon committed
1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
    
class CTF_Function_timer{
  public:
    char name[MAX_NAME_LENGTH];
    double start_time;
    double start_excl_time;
    double acc_time;
    double acc_excl_time;
    int calls;

    double total_time;
    double total_excl_time;
    int total_calls;

  public: 
    CTF_Function_timer(char const * name_, 
                   double const start_time_,
                   double const start_excl_time_);
    void compute_totals(MPI_Comm comm);
    bool operator<(CTF_Function_timer const & w) const ;
    void print(FILE *         output, 
               MPI_Comm const comm, 
               int const      rank,
               int const      np);
};
1649

1650

1651 1652 1653
/**
 * \defgroup timer Timing and cost measurement
 * @{
1654 1655
 *//**
 * \brief local process walltime measurement
1656
 */
1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
class CTF_Timer{
  public:
    char const * timer_name;
    int index;
    int exited;
    int original;
  
  public:
    CTF_Timer(char const * name);
    ~CTF_Timer();
    void stop();
    void start();
    void exit();
    
};
1672

1673 1674 1675 1676 1677 1678 1679 1680
/**
 * \brief epoch during which to measure timers
 */
class CTF_Timer_epoch{
  private:
    CTF_Timer * tmr_inner;
    CTF_Timer * tmr_outer;
    std::vector<CTF_Function_timer> saved_function_timers;
1681 1682
    double save_excl_time;
    double save_complete_time; 
1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
  public:
    char const * name;
    //create epoch called name
    CTF_Timer_epoch(char const * name_);
    
    //clears timers and begins epoch
    void begin();

    //prints timers and clears them
    void end();
};

1695

1696 1697 1698
/**
 * \brief a term is an abstract object representing some expression of tensors
 */
1699

solomon's avatar
solomon committed
1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725
/**
 * \brief measures flops done in a code region
 */
class CTF_Flop_Counter{
  public:
    long_int start_count;

  public:
    /**
     * \brief constructor, starts counter
     */
    CTF_Flop_Counter();
    ~CTF_Flop_Counter();

    /**
     * \brief restarts counter
     */
    void zero();

    /**
     * \brief get total flop count over all counters in comm
     */
    long_int count(MPI_Comm comm = MPI_COMM_SELF);

};

1726 1727 1728 1729 1730
/**
 * @}
 */


1731

1732
#endif
1733