diff --git a/.gitignore b/.gitignore index 3e3dd51..04d97a9 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ test/request_tracking/fenix_request_tracking_test test/request_tracking/fenix_request_tracking_test_nofenix build/ install/ +spack-* # Other *~ diff --git a/CMakeLists.txt b/CMakeLists.txt index ecaac8b..b8d6c7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ cmake_minimum_required(VERSION 3.10.2) -project(Fenix C) +project(Fenix C CXX) # The version number. set(FENIX_VERSION_MAJOR 1) set(FENIX_VERSION_MINOR 0) diff --git a/examples/01_hello_world/fenix/CMakeLists.txt b/examples/01_hello_world/fenix/CMakeLists.txt index 6a344f4..b22ccb9 100644 --- a/examples/01_hello_world/fenix/CMakeLists.txt +++ b/examples/01_hello_world/fenix/CMakeLists.txt @@ -10,6 +10,7 @@ add_executable(fenix_hello_world fenix_hello_world.c) target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES}) +set_target_properties(fenix_hello_world PROPERTIES LINKER_LANGUAGE C) if(BUILD_TESTING) add_test(NAME hello_world diff --git a/examples/01_hello_world/fenix/fenix_hello_world.c b/examples/01_hello_world/fenix/fenix_hello_world.c index 9008ee7..6509a0b 100644 --- a/examples/01_hello_world/fenix/fenix_hello_world.c +++ b/examples/01_hello_world/fenix/fenix_hello_world.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include diff --git a/examples/02_send_recv/fenix/CMakeLists.txt b/examples/02_send_recv/fenix/CMakeLists.txt index bf40679..e88d944 100644 --- a/examples/02_send_recv/fenix/CMakeLists.txt +++ b/examples/02_send_recv/fenix/CMakeLists.txt @@ -9,11 +9,12 @@ # add_executable(fenix_ring fenix_ring.c) -target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES} m ) +target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES}) +set_target_properties(fenix_ring PROPERTIES LINKER_LANGUAGE C) if(BUILD_TESTING) - add_test(NAME ring + add_test(NAME send_recv COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_ring ${MPIEXEC_POSTFLAGS} 1 2) - set_tests_properties(ring PROPERTIES + set_tests_properties(send_recv PROPERTIES FAIL_REGULAR_EXPRESSION "FAILURE") endif() diff --git a/examples/05_subset_create/subset_create.c b/examples/05_subset_create/subset_create.c index c819318..23c15ca 100644 --- a/examples/05_subset_create/subset_create.c +++ b/examples/05_subset_create/subset_create.c @@ -66,11 +66,12 @@ int max_iter = 2; const int kCount = 100; const int kKillID = 2; +const int my_group = 0; +const int my_member = 0; int main(int argc, char **argv) { -fprintf(stderr, "Started\n"); int i; - int subset[500]; + int subset[kCount]; MPI_Status status; if (argc < 2) { @@ -86,7 +87,6 @@ fprintf(stderr, "Started\n"); int num_ranks; int rank; int error; - int my_group = 0; int my_timestamp = 0; int my_depth = 1; int recovered = 0; @@ -120,24 +120,33 @@ fprintf(stderr, "Started\n"); if (fenix_role == FENIX_ROLE_INITIAL_RANK) { // init my subset data - int index; - for (index = 0; index < kCount; index++) { + for (int index = 0; index < kCount; index++) { subset[index] = -1; } - Fenix_Data_member_create(my_group, 777, subset, kCount, MPI_INT); + Fenix_Data_member_create(my_group, my_member, subset, kCount, MPI_INT); //Store the entire data set for the initial commit. This is not a requirement. - Fenix_Data_member_store(my_group, 777, FENIX_DATA_SUBSET_FULL); + Fenix_Data_member_store(my_group, my_member, FENIX_DATA_SUBSET_FULL); Fenix_Data_commit_barrier(my_group, NULL); } else { //We've had a failure! Time to recover data. - fprintf(stderr, "Starting data recovery on node %d\n", rank); - Fenix_Data_member_restore(my_group, 777, subset, kCount, FENIX_TIME_STAMP_MAX, NULL); + fprintf(stderr, "Starting data recovery on rank %d\n", rank); + + //Set all data to a value that was never stored + for (int index = 0; index < kCount; index++) { + subset[index] = -2; + } + + int restore_ret = Fenix_Data_member_restore(my_group, my_member, subset, kCount, FENIX_TIME_STAMP_MAX, NULL); + + if(restore_ret != FENIX_SUCCESS){ + fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, restore_ret); + } int out_flag; - Fenix_Data_member_attr_set(my_group, 777, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, + Fenix_Data_member_attr_set(my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, subset, &out_flag); @@ -159,12 +168,11 @@ fprintf(stderr, "Started\n"); //We'll store only the small subset that we specified, though. //This means that as far as Fenix is concerned only data within that //subset was ever changed from the initialized value of -1 - Fenix_Data_member_store(my_group, 777, subset_specifier); + Fenix_Data_member_store(my_group, my_member, subset_specifier); Fenix_Data_commit_barrier(my_group, NULL); MPI_Barrier(new_comm); //Make sure everyone is done committing before we kill and restart everyone //else we may end up with only some nodes having the commit, and it being unusable - } @@ -172,7 +180,7 @@ fprintf(stderr, "Started\n"); if (rank == kKillID && recovered == 0) { fprintf(stderr, "Doing kill on node %d\n", rank); pid_t pid = getpid(); - kill(pid, SIGTERM); + kill(pid, SIGKILL); } //Make sure we've let rank 2 fail before proceeding, so we're definitely checking @@ -214,6 +222,6 @@ fprintf(stderr, "Started\n"); Fenix_Finalize(); - MPI_Finalize(); + //MPI_Finalize(); return !successful; //return error status } diff --git a/examples/07_resizeable_member/CMakeLists.txt b/examples/07_resizeable_member/CMakeLists.txt new file mode 100644 index 0000000..accefdd --- /dev/null +++ b/examples/07_resizeable_member/CMakeLists.txt @@ -0,0 +1,21 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +add_executable(resizeable resizeable.cpp) +target_link_libraries(resizeable fenix ${MPI_C_LIBRARIES}) + +target_compile_features(resizeable PRIVATE cxx_std_20) + +if(BUILD_TESTING) + add_test(NAME resizeable + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} resizeable ${MPIEXEC_POSTFLAGS} 1) + set_tests_properties(resizeable PROPERTIES + FAIL_REGULAR_EXPRESSION "FAILURE" LABELS "Example") +endif() diff --git a/examples/07_resizeable_member/resizeable.cpp b/examples/07_resizeable_member/resizeable.cpp new file mode 100644 index 0000000..98b3697 --- /dev/null +++ b/examples/07_resizeable_member/resizeable.cpp @@ -0,0 +1,203 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr int kKillID = 2; +constexpr int my_group = 0; +constexpr int my_member = 0; +constexpr int start_timestamp = 0; +constexpr int group_depth = 1; +int errflag; + +using Fenix::DataSubset; +using namespace Fenix::Data; + +int main(int argc, char **argv) { + MPI_Init(&argc, &argv); + + MPI_Comm res_comm; + Fenix::init({.out_comm = &res_comm, .spares = 1}); + + int num_ranks, rank; + MPI_Comm_size(res_comm, &num_ranks); + MPI_Comm_rank(res_comm, &rank); + + std::vector data; + + bool should_throw = Fenix_get_role() == FENIX_ROLE_RECOVERED_RANK; + while(true) try { + if(should_throw){ + should_throw = false; + Fenix::throw_exception(); + } + + //Initial work and commits + if(Fenix_get_role() == FENIX_ROLE_INITIAL_RANK){ + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + Fenix_Data_member_create( + my_group, my_member, data.data(), FENIX_RESIZEABLE, MPI_INT + ); + + data.resize(100); + for(int& i : data) i = -1; + + + //Store the whole array first. We need to keep our buffer pointer updated + //since resizing an array can change it + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_store(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + //Now commit a smaller portion with different data. + data.resize(50); + int val = 1; + for(int& i : data) i = val++; + + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_store(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + if(rank == kKillID){ + fprintf(stderr, "Doing kill on node %d\n", rank); + raise(SIGTERM); + } + } + + Fenix_Finalize(); + + + break; + } catch (const Fenix::CommException& e) { + const Fenix::CommException* err = &e; + while(true) try { + //We've had a failure! Time to recover data. + fprintf(stderr, "Starting data recovery on rank %d\n", rank); + if(err->fenix_err != FENIX_SUCCESS){ + fprintf(stderr, "FAILURE on Fenix Init (%d). Exiting.\n", err->fenix_err); + exit(1); + } + + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + + //Do a null restore to get information about the stored subset + DataSubset stored_subset; + int ret = member_restore( + my_group, my_member, nullptr, 0, FENIX_TIME_STAMP_MAX, stored_subset + ); + if(ret != FENIX_SUCCESS) { + fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, ret); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + //Resize data to fit all stored data + data.resize(stored_subset.max_count()); + + //Set all data to a value that was never stored, just for testing + for(int& i : data) i = -2; + + //Now do an lrestore to get the recovered data. + ret = member_lrestore( + my_group, my_member, data.data(), data.size(), FENIX_TIME_STAMP_MAX, + stored_subset + ); + + break; + } catch (const Fenix::CommException& nested){ + err = &nested; + } + } + + //Ensure data is correct after execution and recovery + bool successful = data.size() == 50; + if(!successful) printf("Rank %d expected data size 50, but got %d\n", rank, data.size()); + + for(int i = 0; i < data.size() && successful; i++){ + successful &= data[i] == i+1; + if(!successful) printf("Rank %d data[%d]=%d, but should be %d!\n", rank, i, data[i], i+1); + } + + if(successful){ + printf("Rank %d successfully recovered\n", rank); + } else { + printf("FAILURE on rank %d\n", rank); + } + + MPI_Finalize(); + return !successful; //return error status +} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b1f7321..faa8411 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(03_reduce/fenix) add_subdirectory(04_Isend_Irecv/fenix) add_subdirectory(05_subset_create) add_subdirectory(06_subset_createv) +add_subdirectory(07_resizeable_member) diff --git a/include/fenix.h b/include/fenix.h index 7a68f82..1a0c626 100644 --- a/include/fenix.h +++ b/include/fenix.h @@ -63,8 +63,8 @@ #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif -#include "fenix_data_subset.h" -#include "fenix_process_recovery.h" + +#include "fenix_init.h" /** * @file @@ -112,8 +112,6 @@ extern "C" { //!@internal @brief Agreement code for data commit barrier #define FENIX_DATA_COMMIT_BARRIER_LOC 4 - - /** * @defgroup ProcessRecovery Process Recovery * @brief Functions for managing process recovery in Fenix. @@ -142,6 +140,30 @@ typedef enum { FENIX_ROLE_SURVIVOR_RANK = 2 } Fenix_Rank_role; +/** + * @brief Options for passing control back to application after recovery. + */ +typedef enum { + //!Return to Fenix_Init via longjmp (default) + FENIX_RESUME_JUMP, + //!Return the error code inline + FENIX_RESUME_RETURN, + //!Throw a Fenix::CommException + FENIX_RESUME_THROW +} Fenix_Resume_mode; + +/** + * @brief Options for dealing with 'unhandled' errors, e.g. invalid rank IDs + */ +typedef enum { + //!Ignore unhandled errors + FENIX_UNHANDLED_SILENT, + //!Print error and continue without handling + FENIX_UNHANDLED_PRINT, + //!Print error and abort Fenix's world (default) + FENIX_UNHANDLED_ABORT +} Fenix_Unhandled_mode; + /** * @fn void Fenix_Init(int* role, MPI_Comm comm, MPI_Comm* newcomm, int** argc, char*** argv, int spare_ranks, int spawn, MPI_Info info, int* error); * @brief Build a resilient communicator and set the restart point. @@ -193,14 +215,13 @@ typedef enum { * @param[in] spawn *Unimplemented*: Whether to enable spawning new ranks to replace * failed ranks when spares are unavailable. * @param[in] info Fenix recovery configuration parameters, may be MPI_INFO_NULL - * Supports the "FENIX_RESUME_MODE" key, used to indicate where execution should resume upon + * "FENIX_RESUME_MODE" key is used to indicate where execution should resume upon * rank failure for all active (non-spare) ranks in any resilient communicators, not only for - * those ranks in communicators that failed. The following values associated with the - * "resume_mode" key are supported: - * - "Fenix_init" (default): execution resumes at logical exit of Fenix_Init. - * - "NO_JUMP": execution continues from the failing MPI call. Errors are otherwise handled - * as normal, but return the error code as well. Applications should typically - * either check for return codes or assign an error callback through Fenix. + * those ranks in communicators that failed. The value should be a string with the name of a + * Fenix_Resume_mode enum value. + * "FENIX_UNHANDLED_MODE" key is used to indicate how Fenix should handle error values + * returned by MPI functions that are unrelated to failed processes. The value should be + * a string with the name of a Fenix_Unhandled_mode enum value. * @param[out] error The return status of \c Fenix_Init
* Used to signal that a non-fatal error or special condition was encountered in the execution of * Fenix_Init, or FENIX_SUCCESS otherwise. It has the same value across all ranks released by @@ -217,10 +238,8 @@ typedef enum { *(_role) = __fenix_preinit(_role, _comm, _newcomm, _argc, \ _argv, _spare_ranks, _spawn, _info, \ _error, &bufjmp); \ - if(setjmp(bufjmp)) { \ - *(_role) = FENIX_ROLE_SURVIVOR_RANK; \ - } \ - __fenix_postinit( _error ); \ + setjmp(bufjmp); \ + __fenix_postinit(); \ } @@ -260,6 +279,11 @@ int Fenix_Callback_register(void (*recover)(MPI_Comm, int, void *), */ int Fenix_Callback_pop(); +/** + * @brief Invoke all callbacks with information from the last recovered fault + */ +void Fenix_Callback_invoke_all(); + /** * @brief Check for any failed ranks * @@ -273,7 +297,16 @@ int Fenix_Process_detect_failures(int do_recovery); int Fenix_get_number_of_ranks_with_role(int, int *); //!@unimplemented Returns the #Fenix_Rank_role for a given rank -int Fenix_get_role(MPI_Comm comm, int rank, int *role); +int Fenix_get_rank_role(MPI_Comm comm, int rank, int *role); + +//!@brief Returns this rank's #Fenix_Rank_role +Fenix_Rank_role Fenix_get_role(); + +//!@brief Returns the error value from Fenix_Init or the latest recovery +int Fenix_get_error(); + +//!@brief Returns the number of spare ranks currently available to Fenix +int Fenix_get_nspare(); /** * @brief Get the list of ranks that failed in the most recent failure. @@ -332,9 +365,13 @@ int Fenix_Finalize(); #define FENIX_DATA_MEMBER_ATTRIBUTE_SIZE 14 #define FENIX_DATA_SNAPSHOT_LATEST -1 #define FENIX_DATA_SNAPSHOT_ALL 16 +#define FENIX_RESIZEABLE 0 #define FENIX_DATA_SUBSET_CREATED 2 #define FENIX_DATA_POLICY_IN_MEMORY_RAID 13 +#define FENIX_DATA_POLICY_IMR FENIX_DATA_POLICY_IN_MEMORY_RAID + +#define FENIX_TIME_STAMP_IGNORE NULL /** * @unimplemented As MPI_Request, but for Fenix asynchronous data recovery calls @@ -344,12 +381,29 @@ typedef struct { MPI_Request mpi_recv_req; } Fenix_Request; + +/** + * @brief Represents a data subset that can be stored/recovered + * + * Must be initialized (via #Fenix_Data_subset_create or + * #Fenix_Data_subset_createv) before using as an input parameter. + * + * Must be uninitialized or freed (#Fenix_Data_subset_free) before using as an + * output parameter to avoid data leaks. + */ +typedef struct { + //!@internal @brief pointer to a Fenix::DataSubset object + void* impl; +} Fenix_Data_subset; + + //!@brief A standin for checkpointing/recovering all available data in a member. extern const Fenix_Data_subset FENIX_DATA_SUBSET_FULL; //!@brief A standin for checkpointing/recovering none of the available data in a member. extern const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY; +extern Fenix_Data_subset* FENIX_DATA_SUBSET_IGNORE; /** * @brief Create a Data Group @@ -403,7 +457,7 @@ int Fenix_Data_group_create(int group_id, MPI_Comm comm, int start_time_stamp, * is critical for non-survivor ranks after a failure which will have an invalid address * which was generated on the failed rank and must update. * @param count The maximum number of contiguous elements of type \c datatype of the data to be - * stored. Need not be the same in all calling ranks. + * stored. A value of FENIX_RESIZEABLE allows this member to have a varying data size. * @param datatype The MPI_Datatype of the elements in \c source_buffer * * @return FENIX_SUCCESS, or an error value. @@ -443,24 +497,25 @@ int Fenix_Data_test(Fenix_Request request, int *flag); * @param member_id All ranks must provide the same member_id * @param subset_specifier Which subset of the data to store. It is always valid for every rank to provide the same * subset_specifier; depending on the group's policy, varying combinations of specifiers may be possible. + * If this member was created with size FENIX_RESIZEABLE, FENIX_DATA_SUBSET_ALL is an invalid input. * @return FENIX_SUCCESS, or an error value. */ int Fenix_Data_member_store(int group_id, int member_id, - Fenix_Data_subset subset_specifier); + const Fenix_Data_subset subset_specifier); //!@unimplemented As [store](#Fenix_Data_member_store), but subsets may vary rank-to-rank. int Fenix_Data_member_storev(int group_id, int member_id, - Fenix_Data_subset subset_specifier); + const Fenix_Data_subset subset_specifier); //!@unimplemented As [store](#Fenix_Data_member_store), but asynchronous. int Fenix_Data_member_istore(int group_id, int member_id, - Fenix_Data_subset subset_specifier, + const Fenix_Data_subset subset_specifier, Fenix_Request *request); //!@unimplemented As [istore](#Fenix_Data_member_istore), but asynchronous. int Fenix_Data_member_istorev(int group_id, int member_id, - Fenix_Data_subset subset_specifier, + const Fenix_Data_subset subset_specifier, Fenix_Request *request); /** @@ -606,10 +661,23 @@ int Fenix_Data_subset_createv(int num_blocks, int *array_start_offsets, */ int Fenix_Data_subset_delete(Fenix_Data_subset *subset_specifier); -//!@unimplemented Get the number of members in a data group. +/** + * @brief Get the number of members in a data group. + * + * @param[in] group_id The group to query + * @param[out] number_of_members Number of members in the group + */ int Fenix_Data_group_get_number_of_members(int group_id, int *number_of_members); -//!@unimplemented Get member ID based on member index +/** + * @brief Get member ID based on member index + * + * See #Fenix_Data_group_get_number_of_members + * + * @param[in] group_id The group to query + * @param[out] member_id The member id at this index in the group + * @param[in] position The position to check, [0, number_of_members) + */ int Fenix_Data_group_get_member_at_position(int group_id, int *member_id, int position); @@ -695,6 +763,8 @@ int Fenix_Data_member_delete(int group_id, int member_id); #if defined(c_plusplus) || defined(__cplusplus) } + +#include "fenix.hpp" #endif #endif // __FENIX__ diff --git a/include/fenix.hpp b/include/fenix.hpp new file mode 100644 index 0000000..85acfca --- /dev/null +++ b/include/fenix.hpp @@ -0,0 +1,229 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef __FENIX_HPP__ +#define __FENIX_HPP__ + +#include +#include +#include +#include +#include "fenix.h" +#include "fenix_exception.hpp" +#include "fenix_data_subset.hpp" + +/** + * @brief As the C-style callback, but accepts an std::function and does not use the void* pointer. + * + * @param[in] callback The function to register. + * + * @returnstatus + */ +int Fenix_Callback_register(std::function callback); + +namespace Fenix { + +using Role = Fenix_Rank_role; +constexpr Role INITIAL_RANK = FENIX_ROLE_INITIAL_RANK; +constexpr Role RECOVERED_RANK = FENIX_ROLE_RECOVERED_RANK; +constexpr Role SURVIVOR_RANK = FENIX_ROLE_SURVIVOR_RANK; + +using ResumeMode = Fenix_Resume_mode; +constexpr ResumeMode JUMP = FENIX_RESUME_JUMP; +constexpr ResumeMode RETURN = FENIX_RESUME_RETURN; +constexpr ResumeMode THROW = FENIX_RESUME_THROW; + +enum CallbackExceptionMode { + RETHROW, + SQUASH +}; + +using UnhandledMode = Fenix_Unhandled_mode; +constexpr UnhandledMode SILENT = FENIX_UNHANDLED_SILENT; +constexpr UnhandledMode PRINT = FENIX_UNHANDLED_PRINT; +constexpr UnhandledMode ABORT = FENIX_UNHANDLED_ABORT; + +namespace Args { +struct FenixInitArgs { + int* role = nullptr; + MPI_Comm in_comm = MPI_COMM_WORLD; + MPI_Comm* out_comm = nullptr; + int* argc = nullptr; + char*** argv = nullptr; + int spares = 0; + int spawn = 0; + ResumeMode resume_mode = THROW; + CallbackExceptionMode callback_exception_mode = RETHROW; + UnhandledMode unhandled_mode = ABORT; + int* err = nullptr; +}; +} + +void init(const Args::FenixInitArgs args); + +//!@brief Throw an exception for the most recent fault. Helpful for spares. +void throw_exception(); + +//!@brief Overload of #Fenix_get_role +Fenix_Rank_role role(); + +//!@brief Overload of #Fenix_get_error +int error(); + +//!@brief Overload of #Fenix_get_nspare +int nspare(); + +//!@brief Overload of #Fenix_Callback_register +int callback_register(std::function callback); + +//@!brief Overload of #Fenix_Callback_pop +int callback_pop(); + +//@!brief Overload of #Fenix_Callback_invoke_all +void callback_invoke_all(); + +/** + * @brief Get the failed ranks from the most recent recovery + * @return vector of failed ranks + */ +std::vector fail_list(); + +//!@brief Overload of #Fenix_Process_detect_failures +int detect_failures(bool recover = true); + +//!@brief Overload of #Fenix_Initialized that directly returns true if initialized +bool initialized(); + +} // namespace Fenix + +namespace Fenix::Data { + +extern const DataSubset SUBSET_FULL; +extern const DataSubset SUBSET_EMPTY; +extern DataSubset SUBSET_IGNORE; + +//@!brief Overload of Fenix_Data_group_create +int group_create( + int group_id, MPI_Comm comm, int start_time_stamp, int depth, + int policy_name, void* policy_value, int* flag +); + +//@!brief Overload of Fenix_Data_member_create +int member_create( + int group_id, int member_id, void* buffer, int count, MPI_Datatype datatype +); + +//!@brief Overload of #Fenix_Data_member_store +int member_store(int group_id, int member_id, const DataSubset& subset); + +//!@brief Overload of #Fenix_Data_member_storev +int member_storev(int group_id, int member_id, const DataSubset& subset); + +//!@brief Overload of #Fenix_Data_member_istore +int member_istore( + int group_id, int member_id, const DataSubset& subset, + Fenix_Request *request +); + +//!@brief Overload of #Fenix_Data_member_istorev +int member_istorev( + int group_id, int member_id, const DataSubset& subset, + Fenix_Request *request +); + +//!@brief Overload of #Fenix_Data_member_restore +int member_restore( + int group_id, int member_id, void *target_buffer, int max_length, + int time_stamp, DataSubset& data_found +); + +//!@brief Overload of #Fenix_Data_member_lrestore +int member_lrestore( + int group_id, int member_id, void *target_buffer, int max_length, + int time_stamp, DataSubset& data_found +); + +//@!brief overload of #Fenix_Data_commit +int commit(int group_id, int* time_stamp = nullptr); + +//@!brief overload of #Fenix_Data_commit +int commit_barrier(int group_id, int* time_stamp = nullptr); + +/** + * @brief get the members of a group + * @return vector of member IDs of each member in group_id if group exists + */ +std::optional> group_members(int group_id); + +/** + * @brief get the snapshots of a group + * @return vector of timestamps of each snapshot in group_id if group exists + */ +std::optional> group_snapshots(int group_id); + +//@!brief Overload of #Fenix_Data_snapshot_delete +int snapshot_delete(int group_id, int timestamp); + +//@!brief overload of Fenix_Data_group_delete +int group_delete(int group_id); + +//@!brief overload of Fenix_Data_member_delete +int member_delete(int group_id, int member_id); + +} // namespace Fenix::Data + +#endif diff --git a/include/fenix_comm_list.h b/include/fenix_comm_list.hpp similarity index 100% rename from include/fenix_comm_list.h rename to include/fenix_comm_list.hpp diff --git a/include/fenix_data_buffer.hpp b/include/fenix_data_buffer.hpp new file mode 100644 index 0000000..c56de2f --- /dev/null +++ b/include/fenix_data_buffer.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef FENIX_DATA_BUFFER_HPP +#define FENIX_DATA_BUFFER_HPP + +#include +#include + +namespace Fenix { +namespace Detail { + +template +struct UninitializedCharAllocator : public std::allocator{ + UninitializedCharAllocator() noexcept {}; + template + UninitializedCharAllocator(const U& other) noexcept {}; + + using value_type = T; + void construct(char*){ }; + + template + struct rebind { + using other = UninitializedCharAllocator; + }; +}; + +using BufferVec = std::vector>; + +} + +class DataBuffer : public Detail::BufferVec { + public: + using BufferVec = Detail::BufferVec; + + void reset(size_t new_size = 0){ + //Clear first, to be sure any re-allocations don't actually move data + clear(); + resize(new_size); + } + + int send(int dst, int tag, MPI_Comm comm){ + return MPI_Send(data(), size(), MPI_BYTE, dst, tag, comm); + } + + //Recv n bytes + int recv( + int n, int src, int tag, MPI_Comm comm, + MPI_Status* status = MPI_STATUS_IGNORE + ) { + reset(n); + return MPI_Recv(data(), n, MPI_BYTE, src, tag, comm, status); + } + + //Recv an unknown amount of data and resize to fit + int recv_unknown( + int src, int tag, MPI_Comm comm, MPI_Status* status = MPI_STATUS_IGNORE + ) { + MPI_Status p_status; + MPI_Probe(src, tag, comm, &p_status); + + int n; + MPI_Get_count(&p_status, MPI_BYTE, &n); + return recv(n, src, tag, comm); + } + +}; + +} + +#endif //FENIX_DATA_BUFFER_HPP diff --git a/include/fenix_data_group.h b/include/fenix_data_group.hpp similarity index 57% rename from include/fenix_data_group.h rename to include/fenix_data_group.hpp index 67cb079..0e84b01 100644 --- a/include/fenix_data_group.h +++ b/include/fenix_data_group.hpp @@ -56,79 +56,24 @@ #ifndef __FENIX_DATA_GROUP_H__ #define __FENIX_DATA_GROUP_H__ +#include + #include #include "fenix.h" -#include "fenix_data_member.h" -#include "fenix_data_packet.h" -#include "fenix_util.h" -#include "fenix_data_subset.h" +#include "fenix_data_member.hpp" +#include "fenix_data_packet.hpp" +#include "fenix_util.hpp" +#include "fenix_data_subset.hpp" #define __FENIX_DEFAULT_GROUP_SIZE 32 -typedef struct __fenix_group_vtbl fenix_group_vtbl_t; -typedef struct __fenix_group fenix_group_t; - -//This defines the functions which must be implemented by the group -typedef struct __fenix_group_vtbl { - int (*group_delete)(fenix_group_t* group); - - int (*member_create)(fenix_group_t* group, fenix_member_entry_t* mentry); - - int (*member_delete)(fenix_group_t* group, int member_id); - - int (*get_redundant_policy)(fenix_group_t*, int* policy_name, - void* policy_value, int* flag); - - int (*member_store)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); - - int (*member_storev)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); - - int (*member_istore)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); - - int (*member_istorev)(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); - - int (*commit)(fenix_group_t* group); - - int (*snapshot_delete)(fenix_group_t* group, int time_stamp); - - int (*barrier)(fenix_group_t* group); - - int (*member_restore)(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); - - int (*member_lrestore)(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); +namespace Fenix::Data { - int (*member_restore_from_rank)(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - int source_rank); - - int (*get_number_of_snapshots)(fenix_group_t* group, - int* number_of_snapshots); - - int (*get_snapshot_at_position)(fenix_group_t* group, int position, - int* time_stamp); - - int (*reinit)(fenix_group_t* group, int* flag); - - int (*member_get_attribute)(fenix_group_t* group, fenix_member_entry_t* mentry, - int attributename, void* attributevalue, int* flag, int sourcerank); - - int (*member_set_attribute)(fenix_group_t* group, fenix_member_entry_t* mentry, - int attributename, void* attributevalue, int* flag); - -} fenix_group_vtbl_t; +using member_iterator = std::pair; //We keep basic bookkeeping info here, policy specific //information is kept by the policy's data type. -typedef struct __fenix_group { - fenix_group_vtbl_t vtbl; +struct fenix_group_t { int groupid; MPI_Comm comm; int comm_size; @@ -137,13 +82,40 @@ typedef struct __fenix_group { int timestamp; int depth; int policy_name; - fenix_member_t *member; -} fenix_group_t; + std::map members; + + std::vector get_member_ids(); + //Search for id, returning {-1, nullptr} if not found. + Fenix::Data::member_iterator search_member(int id); + //As search_member, but print an error message if id not found. + Fenix::Data::member_iterator find_member(int id); + + virtual int group_delete() = 0; + virtual int member_create(fenix_member_entry_t* member) = 0; + virtual int member_delete(int memberid) = 0; + virtual int get_redundant_policy(int* name, void* value, int* flag) = 0; + virtual int member_store(int memberid, const DataSubset& subset) = 0; + virtual int member_storev(int memberid, const DataSubset& subset) = 0; + virtual int member_istore(int memberid, const DataSubset& subset, Fenix_Request* req) = 0; + virtual int member_istorev(int memberid, const DataSubset& subset, Fenix_Request* req) = 0; + virtual int commit() = 0; + virtual int snapshot_delete(int timestamp) = 0; + virtual int barrier() = 0; + virtual int member_restore(int member_id, void* target_bugger, int max, int timestamp, DataSubset& data_found) = 0; + virtual int member_lrestore(int member_id, void* target_bugger, int max, int timestamp, DataSubset& data_found) = 0; + virtual int member_restore_from_rank(int member_id, void* target_bugger, int max, int timestamp, int source_rank) = 0; + virtual int get_number_of_snapshots(int* num) = 0; + virtual int get_snapshot_at_position(int position, int* timestamp) = 0; + virtual std::vector get_snapshots() = 0; + virtual int reinit(int* flag) = 0; + virtual int member_get_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag, int sourcerank) = 0; + virtual int member_set_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag) = 0; +}; typedef struct __fenix_data_recovery { size_t count; size_t total_size; - fenix_group_t **group; + Fenix::Data::fenix_group_t **group; } fenix_data_recovery_t; typedef struct __group_entry_packet { @@ -155,18 +127,21 @@ typedef struct __group_entry_packet { fenix_data_recovery_t * __fenix_data_recovery_init(); -int __fenix_group_delete(int groupid); - -int __fenix_member_delete(int groupid, int memberid); - void __fenix_data_recovery_destroy( fenix_data_recovery_t *fx_data_recovery ); void __fenix_data_recovery_reinit( fenix_data_recovery_t *dr, fenix_two_container_packet_t packet); void __fenix_ensure_data_recovery_capacity( fenix_data_recovery_t *dr); -int __fenix_search_groupid( int key, fenix_data_recovery_t *dr ); +int __fenix_search_groupid( int key, fenix_data_recovery_t *dr); int __fenix_find_next_group_position( fenix_data_recovery_t *dr ); +using group_iterator = std::pair; + +group_iterator find_group(int id); +group_iterator find_group(int id, fenix_data_recovery_t *dr); + +} //end namespace Fenix::Data + #endif // FENIX_DATA_GROUP_H diff --git a/include/fenix_data_member.h b/include/fenix_data_member.hpp similarity index 77% rename from include/fenix_data_member.h rename to include/fenix_data_member.hpp index 391142b..3941060 100644 --- a/include/fenix_data_member.h +++ b/include/fenix_data_member.hpp @@ -57,48 +57,44 @@ #define __FENIX_DATA_MEMBER_H__ #include -#include "fenix_data_packet.h" -#include "fenix_util.h" +#include "fenix_data_packet.hpp" +#include "fenix_util.hpp" #define __FENIX_DEFAULT_MEMBER_SIZE 512 +namespace Fenix::Data { -typedef struct __fenix_member_entry { +struct fenix_group_t; + +struct fenix_member_entry_packet_t { int memberid; - enum states state; - void *user_data; int datatype_size; int current_count; -} fenix_member_entry_t; +}; -typedef struct __fenix_member { - size_t count; - size_t total_size; - fenix_member_entry_t *member_entry; -} fenix_member_t; +struct fenix_member_entry_t { + fenix_member_entry_t() = default; -typedef struct __member_entry_packet { - int memberid; + fenix_member_entry_packet_t to_packet(); + + int memberid = -1; + enum states state; + char *user_data = nullptr; int datatype_size; int current_count; -} fenix_member_entry_packet_t; - -fenix_member_t *__fenix_data_member_init( ); -void __fenix_data_member_destroy( fenix_member_t *member ) ; +}; -void __fenix_ensure_member_capacity( fenix_member_t *m ); -void __fenix_ensure_version_capacity_from_member( fenix_member_t *m ); - -fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, +fenix_member_entry_t* __fenix_data_member_add_entry(fenix_group_t* group, int memberid, void* data, int count, int datatype_size); int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank); int __fenix_data_member_recv_metadata(int groupid, int src_rank, fenix_member_entry_packet_t* packet); -int __fenix_search_memberid(fenix_member_t* member, int memberid); -int __fenix_find_next_member_position(fenix_member_t *m); +int __fenix_search_memberid(fenix_group_t* group, int memberid); -void __fenix_data_member_reinit(fenix_member_t *m, fenix_two_container_packet_t packet, +void __fenix_data_member_reinit(fenix_group_t *group, fenix_two_container_packet_t packet, enum states mystatus); + +} #endif // FENIX_DATA_MEMBER_H diff --git a/include/fenix_data_packet.h b/include/fenix_data_packet.hpp similarity index 100% rename from include/fenix_data_packet.h rename to include/fenix_data_packet.hpp diff --git a/include/fenix_data_policy.h b/include/fenix_data_policy.hpp similarity index 96% rename from include/fenix_data_policy.h rename to include/fenix_data_policy.hpp index b932985..18b2d23 100644 --- a/include/fenix_data_policy.h +++ b/include/fenix_data_policy.hpp @@ -59,9 +59,13 @@ #include #include "fenix.h" -#include "fenix_data_group.h" +#include "fenix_data_group.hpp" + +namespace Fenix::Data { int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, int policy_name, void* policy_value, int* flag); +} // namespace Fenix::Data + #endif //__FENIX_DATA_POLICY_H__ diff --git a/include/fenix_data_policy_in_memory_raid.hpp b/include/fenix_data_policy_in_memory_raid.hpp new file mode 100644 index 0000000..02759ed --- /dev/null +++ b/include/fenix_data_policy_in_memory_raid.hpp @@ -0,0 +1,236 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ +#define __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ + +#include +#include +#include +#include +#include +#include +#include "fenix_data_group.hpp" +#include "fenix_data_buffer.hpp" +#include "fenix_data_subset.hpp" + +namespace Fenix::Data::IMR { + +void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, + int timestart, int depth, void* policy_value, int* flag); + +struct Entry { + //No copying, must be moved + Entry(const Entry&) = delete; + Entry(Entry&&); + Entry& operator=(Entry&&); + + Entry(int size, int max_count); + + //Re-initializes + void reset(); + + //Get raw buffer pointer + char* data(); + //Get buffer size + int size(); + //Resize buffer + void resize(int size); + //Add subset to region and ensure buf is large enough. + void add_and_fit(const DataSubset& subset); + + DataBuffer buf; + DataSubset region; + + char* partner_data(); + int partner_size(); + void partner_resize(int size); + void partner_add_and_fit(const DataSubset& subset); + + DataBuffer partner_buf; + DataSubset partner_region; + + int timestamp = -2; + int elm_size; + int elm_max_count; +}; + +struct Group; + +struct Member { + Member(fenix_member_entry_t& mentry, Group& group); + + //Returns true if snapshot was found. + bool snapshot_delete(int timestamp); + + //Member::store(v) copies local data and region. + int store(const DataSubset& subset); + //Handles partner data and region + virtual int store_impl(const DataSubset& subset) = 0; + + //As store(_impl) + int storev(const DataSubset& subset); + virtual int storev_impl(const DataSubset& subset) = 0; + + //Restore all internal snapshot data + //Moves entries to align with the group's list of timestamps. + //Impl must actually restoring entry data + int restore(); + virtual int restore_impl() = 0; + + int lrestore(char* target, int max, int timestamp, DataSubset& subset); + + void commit(int timestamp); + + fenix_member_entry_t& mentry; + Group& group; + int id = mentry.memberid; + // entries to be initialized by inheritors + std::deque entries; + + DataBuffer& send_buf; + DataBuffer& recv_buf; +}; + +struct BuddyMember : public Member { + BuddyMember(fenix_member_entry_t& mentry, Group& group); + int restore_impl() override; + int store_impl(const DataSubset& subset) override; + int storev_impl(const DataSubset& subset) override; + int exch(const DataSubset& subset, const DataSubset& partner_subset); +}; + +struct ParityMember : public Member { + ParityMember(fenix_member_entry_t& mentry, Group& group); + int restore_impl() override; + int store_impl(const DataSubset& subset) override; + + int storev_impl(const DataSubset& subset) override { + fatal_print("IMR mode 5 cannot storev"); + return 0; + }; +}; + +struct Group : public fenix_group_t { + Group(MPI_Comm comm, int timestart, int depth, int* policy, int* flag); + + int mode; + int rank_separation; + std::vector partners; + + MPI_Comm set_comm = MPI_COMM_NULL; + int set_size, set_rank; + + std::map> member_data; + std::deque timestamps; + + DataBuffer send_buf, recv_buf; + + void sync_timestamps(); + void build_set_comm(); + + //nullptr if member not found + Member* find_member(int member_id); + + std::string str(); + + int group_delete() override; + int member_create(fenix_member_entry_t* mentry) override; + int member_delete(int member_id) override; + int get_redundant_policy(int* name, void* value, int* flag) override; + + int member_store(int member_id, const DataSubset& subset) override; + int member_storev(int member_id, const DataSubset& subset) override; + int member_istore( + int member_id, const DataSubset& subset, Fenix_Request *request + ) override; + int member_istorev( + int member_id, const DataSubset& subset, Fenix_Request *request + ) override; + + int commit() override; + + int snapshot_delete(int timestamp) override; + int barrier() override; + + int member_restore( + int member_id, void* buffer, int max, int timestamp, DataSubset& data_found + ) override; + int member_lrestore( + int member_id, void* buffer, int max, int timestamp, DataSubset& data_found + ) override; + int member_restore_from_rank( + int member_id, void* buffer, int max, int timestamp, int source_rank + ) override; + + int member_get_attribute( + fenix_member_entry_t* member, int name, void* value, int* flag, + int sourcerank + ) override; + int member_set_attribute( + fenix_member_entry_t* member, int name, void* value, int* flag + ) override; + + int get_number_of_snapshots(int* number_of_snapshots) override; + int get_snapshot_at_position(int position, int* timestamp) override; + std::vector get_snapshots(); + + int reinit(int* flag) override; +}; + +} + +#endif //__FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ diff --git a/include/fenix_data_recovery.h b/include/fenix_data_recovery.hpp similarity index 78% rename from include/fenix_data_recovery.h rename to include/fenix_data_recovery.hpp index 4580cb9..b27efac 100644 --- a/include/fenix_data_recovery.h +++ b/include/fenix_data_recovery.hpp @@ -58,10 +58,10 @@ #define __FENIX_DATA_RECOVERY__ -#include "fenix_data_group.h" -#include "fenix_data_member.h" +#include "fenix_data_group.hpp" +#include "fenix_data_member.hpp" #include "fenix_data_subset.h" -#include "fenix_util.h" +#include "fenix_util.hpp" #include #include #include @@ -74,11 +74,6 @@ #define __NUM_MEMBER_ATTR_SIZE 3 #define __GRP_MEMBER_LENTRY_ATTR_SIZE 11 - - - - - #define STORE_RANK_TAG 2000 #define STORE_COUNT_TAG 2001 #define STORE_SIZE_TAG 2002 @@ -95,10 +90,7 @@ #define RECOVER_DATA_TAG 1907 - - - - +namespace Fenix::Data { typedef struct __data_entry_packet { int count; @@ -106,20 +98,10 @@ typedef struct __data_entry_packet { } fenix_data_entry_packet_t; -int __fenix_group_create(int, MPI_Comm, int, int, int, void*, int*); int __fenix_group_get_redundancy_policy(int, int*, int*, int*); -int __fenix_member_create(int, int, void *, int, int); int __fenix_data_wait(Fenix_Request); int __fenix_data_test(Fenix_Request, int *); -int __fenix_member_store(int, int, Fenix_Data_subset); -int __fenix_member_storev(int, int, Fenix_Data_subset); -int __fenix_member_istore(int, int, Fenix_Data_subset, Fenix_Request *); -int __fenix_member_istorev(int, int, Fenix_Data_subset, Fenix_Request *); -int __fenix_data_commit(int, int *); -int __fenix_data_commit_barrier(int, int *); int __fenix_data_barrier(int); -int __fenix_member_restore(int, int, void *, int, int, Fenix_Data_subset*); -int __fenix_member_lrestore(int, int, void *, int, int, Fenix_Data_subset*); int __fenix_member_restore_from_rank(int, int, void *, int, int, int); int __fenix_get_number_of_members(int, int *); int __fenix_get_member_at_position(int, int *, int); @@ -127,19 +109,7 @@ int __fenix_get_number_of_snapshots(int, int *); int __fenix_get_snapshot_at_position(int, int, int *); int __fenix_member_get_attribute(int, int, int, void *, int *, int); int __fenix_member_set_attribute(int, int, int, void *, int *); -int __fenix_snapshot_delete(int groupid, int timestamp); - -int __fenix_group_delete(int); -int __fenix_member_delete(int, int); - -void __fenix_init_data_recovery(); -void __fenix_init_partner_copy_recovery(); - -void __fenix_dr_print_store(); -void __fenix_dr_print_restore(); -void __fenix_dr_print_datastructure(); -void __fenix_store_single(); -void __fenix_store_all(); +} #endif diff --git a/include/fenix_data_subset.h b/include/fenix_data_subset.h index abb2587..0f08602 100644 --- a/include/fenix_data_subset.h +++ b/include/fenix_data_subset.h @@ -57,45 +57,34 @@ #define __FENIX_DATA_SUBSET_H__ #include -#define __FENIX_SUBSET_EMPTY 1 -#define __FENIX_SUBSET_FULL 2 -#define __FENIX_SUBSET_CREATE 3 -#define __FENIX_SUBSET_CREATEV 4 -#define __FENIX_SUBSET_UNDEFINED -1 - - -//Specifier speeds up the process by letting us know if this is a simple -//subset in which each region is repeated w/ same stride, or if each -//region is never repeated (EG create vs createv). Also has specifiers for -//FULL/EMPTY. -typedef struct { - int num_blocks; - int* start_offsets; - int* end_offsets; - int* num_repeats; - int stride; - int specifier; -} Fenix_Data_subset; +#include "fenix.h" int __fenix_data_subset_init(int num_blocks, Fenix_Data_subset* subset); +int __fenix_data_subset_init_empty(Fenix_Data_subset* subset); int __fenix_data_subset_create(int, int, int, int, Fenix_Data_subset *); int __fenix_data_subset_createv(int, int *, int *, Fenix_Data_subset *); -void __fenix_data_subset_deep_copy(Fenix_Data_subset* from, Fenix_Data_subset* to); -void __fenix_data_subset_merge(Fenix_Data_subset* first_subset, - Fenix_Data_subset* second_subset, Fenix_Data_subset* output); +void __fenix_data_subset_deep_copy(const Fenix_Data_subset* from, Fenix_Data_subset* to); +void __fenix_data_subset_merge(const Fenix_Data_subset* first_subset, + const Fenix_Data_subset* second_subset, Fenix_Data_subset* output); void __fenix_data_subset_merge_inplace(Fenix_Data_subset* first_subset, - Fenix_Data_subset* second_subset); -void __fenix_data_subset_copy_data(Fenix_Data_subset* ss, void* dest, + const Fenix_Data_subset* second_subset); +void __fenix_data_subset_copy_data(const Fenix_Data_subset* ss, void* dest, void* src, size_t data_type_size, size_t max_size); -int __fenix_data_subset_data_size(Fenix_Data_subset* ss, size_t max_size); -void* __fenix_data_subset_serialize(Fenix_Data_subset* ss, void* src, - size_t type_size, size_t max_size, size_t* output_size); -void __fenix_data_subset_deserialize(Fenix_Data_subset* ss, void* src, +int __fenix_data_subset_storage_size(const Fenix_Data_subset* ss, size_t max_size); +void __fenix_data_subset_serialize(const Fenix_Data_subset* ss, void* src, + void* dest, size_t type_size, size_t max_size, size_t output_size); +void __fenix_data_subset_deserialize(const Fenix_Data_subset* ss, void* src, void* dest, size_t max_size, size_t type_size); -void __fenix_data_subset_send(Fenix_Data_subset* ss, int dest, int tag, MPI_Comm comm); +void __fenix_data_subset_send(const Fenix_Data_subset* ss, int dest, int tag, MPI_Comm comm); void __fenix_data_subset_recv(Fenix_Data_subset* ss, int src, int tag, MPI_Comm comm); -int __fenix_data_subset_is_full(Fenix_Data_subset* ss, size_t data_length); +int __fenix_data_subset_is_full(const Fenix_Data_subset* ss, size_t data_length); int __fenix_data_subset_free(Fenix_Data_subset *); int __fenix_data_subset_delete(Fenix_Data_subset *); +size_t __fenix_data_subset_count(const Fenix_Data_subset* ss, size_t max_idx); +inline size_t __fenix_data_subset_data_size( + const Fenix_Data_subset* ss, size_t max_size +){ + return __fenix_data_subset_count(ss, max_size-1); +} #endif // FENIX_DATA_SUBSET_H diff --git a/include/fenix_data_subset.hpp b/include/fenix_data_subset.hpp new file mode 100644 index 0000000..fba9f68 --- /dev/null +++ b/include/fenix_data_subset.hpp @@ -0,0 +1,227 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef __FENIX_DATA_SUBSET_HPP__ +#define __FENIX_DATA_SUBSET_HPP__ + +#include "fenix.h" +#include "fenix_opt.hpp" +#include "fenix_data_buffer.hpp" + +#include +#include +#include +#include +#include + +namespace Fenix { + +namespace Detail { + +struct DataRegionIterator; + +struct DataRegion { + static constexpr size_t MAX = std::numeric_limits::max(); + + DataRegion(std::pair b) + : DataRegion(b, 0, MAX) { }; + DataRegion(std::pair b, size_t m_reps, size_t m_stride) + : start(b.first), + end((b.second!=MAX && b.second+1==b.first+m_stride) ? + b.second+m_reps*m_stride : b.second), + reps((b.second==MAX || b.second+1==b.first+m_stride) ? 0 : m_reps), + stride(reps == 0 ? MAX : m_stride) + { + fenix_assert(start <= end); + fenix_assert(stride != MAX || reps == 0); + fenix_assert(reps == 0 || start+stride > end); + }; + + //Overall range of this region. + std::pair range() const; + + //Count of elements contained in this region + size_t count() const; + + bool operator==(const DataRegion& other) const; + bool operator!=(const DataRegion& other) const; + + //Order based on start + bool operator<(const DataRegion& other) const; + + //Return true if these regions intersect + bool operator&&(const DataRegion& other) const; + + //Returns intersection of two regions. Not defined when both regions are strided. + std::set operator&(const DataRegion& other) const; + + //This region w/o the overlap with other + std::set operator-(const DataRegion& other) const; + + //Get a region that is a single repetition of this one's, with bounds check + DataRegion get_rep(size_t n) const; + //As above, but multiple repetitions + DataRegion get_reps(size_t first, size_t last) const; + + //For a strided region, returns region between repetitions + //Undefined for an unstrided region. + DataRegion inverted() const; + + std::optional try_merge(const DataRegion& other) const; + + std::string str() const; + + //Inclusive region bounds. + size_t start, end; + + //Number of times to repeat this after the first + size_t reps; + + //Distance between starts of each repetition + size_t stride; +}; +} // namespace Detail + +struct DataSubset { + static constexpr size_t MAX = Detail::DataRegion::MAX; + + //DataSubset(const DataSubset&) = default; + //DataSubset(DataSubset&&) = default; + //Empty + DataSubset() = default; + //[0, end] + explicit DataSubset(size_t end); + //[bounds.first, bounds.second] + DataSubset(std::pair bounds); + //[b.first, b.second], ..., [b.first+stride*(n-1), b.second+stride*(n-1)] + DataSubset(std::pair b, size_t n, size_t stride); + //[bounds[0].first, bounds[0].second], ... + DataSubset(std::vector> bounds); + //Merge two subsets + DataSubset(const DataSubset& a, const DataSubset& b); + //Create from serialized subset object + DataSubset(const DataBuffer& buf); + + DataSubset operator+(const DataSubset& other) const; + DataSubset& operator+=(const DataSubset& other); + + DataSubset operator+(const Fenix_Data_subset& other) const; + DataSubset& operator+=(const Fenix_Data_subset& other); + + DataSubset operator-(const DataSubset& other) const; + bool operator==(const DataSubset& other) const; + bool operator!=(const DataSubset& other) const; + + bool empty() const; + + //Overall range of this subset + std::pair range() const; + //Equivalent to range().first and range().second, possibly more performant + size_t start() const; + size_t end() const; + + //Count of elements in this subset from [0, max_index] + //Returns 0 if max_index==end()==MAX + size_t count(size_t max_index) const; + + //Count of elements in this subset if it were full [0, end()] + //Returns 0 if end()==MAX + size_t max_count() const; + + //Serialize this subset object into buf + //Will resize buf to fit exactly. + void serialize(DataBuffer& buf) const; + + //Will reset dst to fit + void serialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst + ) const; + //If dst.size()==0, will resize dst to fit + void deserialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst + ) const; + + //If src_len == 0, will assume src is a large as needed + //Will resize dst if too small + void copy_data( + const size_t elm_size, const size_t src_len, const char* src, DataBuffer& dst + ) const; + //If dst_len == 0, will assume dst is as large as needed + void copy_data( + const size_t elm_size, const DataBuffer& src, const size_t dst_len, + char* dst + ) const; + + //Whether this subset includes the element at index idx + bool includes(size_t idx) const; + //Whether this subset includes the entire range [0, end] without gaps + bool includes_all(size_t end) const; + + //Return equivalent of regions & [0, max_index] + std::set bounded_regions(size_t max_index) const; + //As above, but regions & [start, end] + std::set bounded_regions(size_t start, size_t end) const; + + std::string str() const; + + //Individual data regions in this subset + std::set regions; + + private: + //merge immediately adjacent regions to simplify + void merge_regions(); +}; +} // namespace Fenix +#endif // __FENIX_DATA_SUBSET_HPP_ diff --git a/include/fenix_data_policy_in_memory_raid.h b/include/fenix_exception.hpp similarity index 86% rename from include/fenix_data_policy_in_memory_raid.h rename to include/fenix_exception.hpp index 931b0c7..dfe8d10 100644 --- a/include/fenix_data_policy_in_memory_raid.h +++ b/include/fenix_exception.hpp @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Michael Heroux, and Matthew Whitlock +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -54,13 +54,21 @@ //@HEADER */ -#ifndef __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ -#define __FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ +#ifndef FENIX_EXCEPTION_HPP +#define FENIX_EXCEPTION_HPP #include -#include "fenix_data_group.h" +#include -void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, - int timestart, int depth, void* policy_value, int* flag); +namespace Fenix { -#endif //__FENIX_DATA_POLICY_IN_MEMORY_RAID_H__ +struct CommException : public std::exception { + MPI_Comm repaired_comm; + const int fenix_err; + CommException(MPI_Comm comm, int err) : + repaired_comm(comm), fenix_err(err) { }; +}; + +} + +#endif diff --git a/include/fenix_ext.h b/include/fenix_ext.hpp similarity index 55% rename from include/fenix_ext.h rename to include/fenix_ext.hpp index ef4dcc4..6066e01 100644 --- a/include/fenix_ext.h +++ b/include/fenix_ext.hpp @@ -58,60 +58,61 @@ #define __FENIX_EXT_H__ #include +#include #include "fenix.h" -#include "fenix_opt.h" -#include "fenix_data_group.h" -#include "fenix_process_recovery.h" +#include "fenix.hpp" +#include "fenix_opt.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_data_group.hpp" + +namespace Fenix { typedef struct { - int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init - int num_survivor_ranks; // Keeps the global information on the number of survived MPI ranks after failure - int num_recovered_ranks; // Keeps the number of spare ranks brought into MPI communicator recovery - int resume_mode; // Defines how program resumes after process recovery - int spawn_policy; // Indicate dynamic process spawning - int spare_ranks; // Spare ranks entered by user to repair failed ranks - int repair_result; // Internal global variable to store the result of MPI communicator repair - int finalized; + int num_inital_ranks; // Keeps the global MPI rank ID at Fenix_init + int num_survivor_ranks = 0; // Keeps the global information on the number of survived MPI ranks after failure + int num_recovered_ranks = 0; // Keeps the number of spare ranks brought into MPI communicator recovery + int spare_ranks; // Spare ranks entered by user to repair failed ranks + + ResumeMode resume_mode = JUMP; + CallbackExceptionMode callback_exception_mode = RETHROW; + UnhandledMode unhandled_mode = ABORT; + int ignore_errs = false; // Temporarily ignore all errors & recovery + int spawn_policy; // Indicate dynamic process spawning jmp_buf *recover_environment; // Calling environment to fill the jmp_buf structure + int mpi_fail_code = MPI_SUCCESS; + int repair_result = FENIX_SUCCESS; // Internal variable to store the result of MPI comm repair + int role = FENIX_ROLE_INITIAL_RANK; - //enum FenixRankRole role; // Role of rank: initial, survivor or repair - int role; // Role of rank: initial, survivor or repair - int fenix_init_flag; + int fenix_init_flag = false; + int finalized = false; - int fail_world_size; - int* fail_world; + int fail_world_size = 0; + int* fail_world = nullptr; //Save the pointer to role and error of Fenix_Init - int *ret_role; - int *ret_error; + int *ret_role = nullptr; + int *ret_error = nullptr; - fenix_callback_list_t* callback_list; // singly linked list for user-defined Fenix callback functions - //fenix_communicator_list_t* communicator_list; // singly linked list for Fenix resilient communicators - fenix_debug_opt_t options; // This is reserved to store the user options + std::vector callbacks; + fenix_debug_opt_t options; // This is reserved to store the user options - MPI_Comm *world; // Duplicate of the MPI communicator provided by user - MPI_Comm new_world; // Global MPI communicator identical to g_world but without spare ranks - MPI_Comm *user_world; // MPI communicator with repaired ranks - //Manage state of the comms. Necessary when failures happen rapidly, mussing up state - int new_world_exists, user_world_exists; - + MPI_Comm *world; // Duplicate of comm provided by user + MPI_Comm *user_world; // User-facing comm with repaired ranks and no spares + MPI_Comm new_world; // Internal duplicate of user_world + int new_world_exists = false, user_world_exists = false; + + //Values used for Fenix_Process_detect_failures int dummy_recv_buffer; MPI_Request check_failures_req; - - MPI_Op agree_op; // This is reserved for the global agreement call for Fenix data recovery API - - - MPI_Errhandler mpi_errhandler; // This stores callback info for our custom error handler - int ignore_errs; // Set this to return errors instead of using the error handler normally. (Don't forget to unset!) - int print_unhandled; // Set this to print the error string for MPI errors of an unhandled return type. - - + MPI_Op agree_op; // Global agreement call for Fenix data recovery API + MPI_Errhandler mpi_errhandler; // Our custom error handler - fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure + Fenix::Data::fenix_data_recovery_t *data_recovery; // Global pointer for Fenix Data Recovery Data Structure } fenix_t; -extern fenix_t fenix; -#endif // __FENIX_EXT_H__ +} +extern Fenix::fenix_t fenix; +#endif // __FENIX_EXT_H__ diff --git a/include/fenix_f.h b/include/fenix_f.h deleted file mode 100644 index a8f06c0..0000000 --- a/include/fenix_f.h +++ /dev/null @@ -1,82 +0,0 @@ -!/* -!//@HEADER -!// ************************************************************************ -!// -!// -!// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -!// _| _| _|_| _| _| _| _| -!// _|_|_| _|_|_| _| _| _| _| _| -!// _| _| _| _|_| _| _| _| -!// _| _|_|_|_| _| _| _|_|_| _| _| -!// -!// -!// -!// -!// Copyright (C) 2016 Rutgers University and Sandia Corporation -!// -!// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -!// the U.S. Government retains certain rights in this software. -!// -!// Redistribution and use in source and binary forms, with or without -!// modification, are permitted provided that the following conditions are -!// met: -!// -!// 1. Redistributions of source code must retain the above copyright -!// notice, this list of conditions and the following disclaimer. -!// -!// 2. Redistributions in binary form must reproduce the above copyright -!// notice, this list of conditions and the following disclaimer in the -!// documentation and/or other materials provided with the distribution. -!// -!// 3. Neither the name of the Corporation nor the names of the -!// contributors may be used to endorse or promote products derived from -!// this software without specific prior written permission. -!// -!// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -!// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -!// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -!// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -!// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -!// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -!// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -!// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -!// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -!// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -!// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!// -!// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -!// Michael Heroux, and Matthew Whitlock -!// -!// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -!// Marc Gamell (mgamell@cac.rutgers.edu) -!// -!// ************************************************************************ -!//@HEADER -!*/ - -#ifndef __FENIX_F_H__ -#define __FENIX_F_H__ - - -!/* ------------------------------------------------------------------------- */ -!/* */ -!/* */ -!/* D E F I N E S */ -!/* */ -!/* */ -!/* ------------------------------------------------------------------------- */ - -#if 1 -#define Fenix_Init(_process_role, _comm, _new_comm, _argc, _argv, _num_of_spare_ranks, _spawn, _info, _error) \ - jb \ - call Fenix_Preinit(_process_role, _comm, _new_comm, _argc, _argv, _spare_ranks, _spawn, _info, _error); \ - if(setjmp(jb) .ne. 0) then; \ - _process_status = 2; \ - endif; \ - call Fenix_Postinit( error ); - -#endif - -#endif - - diff --git a/include/fenix_process_recovery_global.h b/include/fenix_init.h similarity index 84% rename from include/fenix_process_recovery_global.h rename to include/fenix_init.h index 4116369..19471d1 100644 --- a/include/fenix_process_recovery_global.h +++ b/include/fenix_init.h @@ -44,7 +44,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, // Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and @@ -53,23 +53,24 @@ // ************************************************************************ //@HEADER */ -#ifndef __FENIX_PROCES_RECOVERY_GLOBAL_H__ -#define __FENIX_PROCES_RECOVERY_GLOBAL_H__ + +#ifndef __FENIX_INIT__ +#define __FENIX_INIT__ #include #include -#include -#include -#include -#include -#include -#include -#include "fenix_opt.h" -#include "fenix_util.h" -#include "fenix_data_group.h" +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +int __fenix_preinit(int *, MPI_Comm, MPI_Comm *, int *, char ***, int, int, MPI_Info, int *, jmp_buf *); + +void __fenix_postinit(); -/* This header file is intended to provide global variable definitions for fenix_process_recovery.c only */ +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif -#endif // __FENIX_PROCES_RECOVERY_GLOBAL_H__ +#endif diff --git a/include/fenix_opt.hpp b/include/fenix_opt.hpp new file mode 100644 index 0000000..be73adb --- /dev/null +++ b/include/fenix_opt.hpp @@ -0,0 +1,136 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef __FENIX_OPT__ +#define __FENIX_OPT__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// FENIX_ABORT kills whole MPI job if MPI visible in current file, else just +// aborts this process +// Prefer fenix_assert or fatal_print instead of using this directly +#ifdef MPI_VERSION + #define FENIX_ABORT() \ + do { \ + int mpi_is_init; \ + MPI_Initialized(&mpi_is_init); \ + if(mpi_is_init) MPI_Abort(MPI_COMM_WORLD, 1); \ + abort(); \ + } while(0) +#else + #define FENIX_ABORT() abort() +#endif + +// Helpers needing to support printing w/o any user-supplied format args +// Supports up to 10 args +// Functions should be named base_name_s for 1 args or base_name_a otherwise +#define FN_SUFF_I(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, NAME, ...) NAME +#define FN_SUFF(...) FN_SUFF_I(__VA_ARGS__,_a,_a,_a,_a,_a,_a,_a,_a,_a,_s) +#define FN_SUFF_MERGE_IMPL(fn, suff) fn ## suff +#define FN_SUFF_MERGE(fn, suff) FN_SUFF_MERGE_IMPL(fn, suff) +#define FN_NAME(base_name, ...) FN_SUFF_MERGE(base_name, FN_SUFF(__VA_ARGS__)) + +#define TRACE_PRINT_FMT "%s:%d %s(): " +#define TRACE_PRINT_ARG __FILE__, __LINE__, __func__ + +#define traced_print_s(file, fmt) \ + fprintf(file, TRACE_PRINT_FMT fmt "\n", TRACE_PRINT_ARG) +#define traced_print_a(file, fmt, ...) \ + fprintf(file, TRACE_PRINT_FMT fmt "\n", TRACE_PRINT_ARG, __VA_ARGS__) +#define traced_print(file, ...) FN_NAME(traced_print, __VA_ARGS__)(file, __VA_ARGS__) + +#define debug_print(...) traced_print(stderr, __VA_ARGS__) +#define verbose_print(...) traced_print(stdout, __VA_ARGS__) + +//Multi-line macro functions wrapped in do-while to maintain correct behavior +//regardless of what surrouding code is +#define fatal_print(...) \ + do { \ + traced_print(stderr, __VA_ARGS__); \ + traced_print(stderr, "Fenix aborting due to fatal error!"); \ + FENIX_ABORT(); \ + } while(0) + +#define fenix_assert_a(predicate, ...) \ + do{if( !(predicate) ){ fatal_print(__VA_ARGS__); }} while(0) +#define fenix_assert_s(predicate) \ + fenix_assert_a(predicate, "internal error, failed assertion (" #predicate ")" ); + +#ifdef NDEBUG + //Disable assertions when NDEBUG + #define fenix_assert(...) do { } while(0) +#else + #define fenix_assert(...) FN_NAME(fenix_assert, __VA_ARGS__)(__VA_ARGS__) +#endif + +typedef struct __fenix_debug_opt_t { + int verbose = -1; +} fenix_debug_opt_t; + +void __fenix_init_opt(int argc, char **argv); + +#endif diff --git a/include/fenix_process_recovery.h b/include/fenix_process_recovery.hpp similarity index 82% rename from include/fenix_process_recovery.h rename to include/fenix_process_recovery.hpp index 9b85e04..c2f26b3 100644 --- a/include/fenix_process_recovery.h +++ b/include/fenix_process_recovery.hpp @@ -57,6 +57,7 @@ #ifndef __FENIX_PROCESS_RECOVERY__ #define __FENIX_PROCESS_RECOVERY__ +#include #include #include #include @@ -65,21 +66,12 @@ #include #include #include +#include -#define __FENIX_RESUME_AT_INIT 0 -#define __FENIX_RESUME_NO_JUMP 200 +#include "fenix_init.h" +#include -typedef void (*recover)( MPI_Comm, int, void *); - -typedef struct fcouple { - recover x; - void *y; -} fenix_callback_func; - -typedef struct __fenix_callback_list { - fenix_callback_func *callback; - struct __fenix_callback_list *next; -} fenix_callback_list_t; +using fenix_callback_func = std::function; typedef struct __fenix_comm_list_elm { struct __fenix_comm_list_elm *next; @@ -92,21 +84,21 @@ typedef struct { fenix_comm_list_elm_t *tail; } fenix_comm_list_t; -int __fenix_preinit(int *, MPI_Comm, MPI_Comm *, int *, char ***, int, int, MPI_Info, int *, jmp_buf *); +Fenix_Resume_mode get_resume_mode(const std::string_view& name); + +Fenix_Unhandled_mode get_unhandled_mode(const std::string_view& name); + +int fenix_preinit(const Fenix::Args::FenixInitArgs& args, jmp_buf* jump_env = nullptr); int __fenix_create_new_world(); int __fenix_repair_ranks(); -int __fenix_callback_register(void (*recover)(MPI_Comm, int, void *), void *); +int __fenix_callback_register(fenix_callback_func& recover); int __fenix_callback_pop(); -void __fenix_callback_push(fenix_callback_list_t **, fenix_callback_func *); - -void __fenix_callback_invoke_all(int error); - -int __fenix_callback_destroy(fenix_callback_list_t *callback_list); +void __fenix_callback_invoke_all(); int* __fenix_get_fail_ranks(int *, int, int); @@ -116,8 +108,6 @@ int __fenix_get_rank_role(); void __fenix_set_rank_role(int FenixRankRole); -void __fenix_postinit(int *); - int __fenix_detect_failures(int do_recovery); void __fenix_finalize(); diff --git a/include/fenix_util.h b/include/fenix_util.hpp similarity index 99% rename from include/fenix_util.h rename to include/fenix_util.hpp index 8f76275..a42d4c3 100644 --- a/include/fenix_util.h +++ b/include/fenix_util.hpp @@ -57,7 +57,7 @@ #ifndef __FENIX_UTIL__ #define __FENIX_UTIL__ -#include "fenix_process_recovery.h" +#include "fenix_process_recovery.hpp" #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7c823fd..b48defd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,35 +12,35 @@ configure_file (${CMAKE_SOURCE_DIR}/include/fenix-config.h.in "${CMAKE_CURRENT_BINARY_DIR}/fenix-config.h" @ONLY) #include_directories(${CMAKE_CURRENT_BINARY_DIR}) -FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h) +FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h*) set (Fenix_SOURCES -fenix.c -fenix_mpi_override.c -fenix_opt.c -fenix_process_recovery.c -fenix_util.c -fenix_data_recovery.c -fenix_data_group.c -fenix_data_policy.c -fenix_data_policy_in_memory_raid.c -fenix_data_member.c -fenix_data_subset.c -fenix_comm_list.c -fenix_callbacks.c -globals.c + fenix.cpp + fenix_opt.cpp + fenix_process_recovery.cpp + fenix_util.cpp + fenix_data_recovery.cpp + fenix_data_group.cpp + fenix_data_policy.cpp + fenix_data_policy_in_memory_raid.cpp + fenix_data_member.cpp + fenix_data_subset.cpp + fenix_callbacks.cpp + fenix_ext.cpp ) -add_library( fenix STATIC ${Fenix_SOURCES}) +add_library(fenix STATIC ${Fenix_SOURCES}) -target_link_libraries(fenix PUBLIC MPI::MPI_C) +target_compile_features(fenix PRIVATE cxx_std_17) -target_include_directories(fenix - PUBLIC +target_link_libraries(fenix PUBLIC MPI::MPI_CXX) + +target_include_directories(fenix + PUBLIC $ $ $ - PRIVATE "${CMAKE_SOURCE_DIR}/src") + PRIVATE "${CMAKE_SOURCE_DIR}/src") install(TARGETS fenix EXPORT fenix diff --git a/src/fenix.c b/src/fenix.cpp similarity index 61% rename from src/fenix.c rename to src/fenix.cpp index 056814a..1bbc058 100644 --- a/src/fenix.c +++ b/src/fenix.cpp @@ -54,23 +54,44 @@ //@HEADER */ -#include "fenix_data_recovery.h" -#include "fenix_process_recovery.h" -#include "fenix_util.h" -#include "fenix_ext.h" -#include "fenix.h" +#include "fenix_data_recovery.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_util.hpp" +#include "fenix_ext.hpp" +#include "fenix.hpp" +#include "fenix_data_subset.hpp" -const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_FULL}; -const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY = {0, NULL, NULL, NULL, 0, __FENIX_SUBSET_EMPTY}; +using namespace Fenix; +using namespace Fenix::Data; + +namespace Fenix::Data { +const DataSubset SUBSET_FULL = {{0, Fenix::DataSubset::MAX}}; +const DataSubset SUBSET_EMPTY = {}; +DataSubset SUBSET_IGNORE = SUBSET_EMPTY; +} + +const Fenix_Data_subset FENIX_DATA_SUBSET_FULL = { new DataSubset(DataSubset::MAX) }; +const Fenix_Data_subset FENIX_DATA_SUBSET_EMPTY = { new DataSubset() }; +Fenix_Data_subset* FENIX_DATA_SUBSET_IGNORE = NULL; + +int Fenix_Callback_register(std::function callback){ + return __fenix_callback_register(callback); +} int Fenix_Callback_register(void (*recover)(MPI_Comm, int, void *), void *callback_data) { - return __fenix_callback_register(recover, callback_data); + return Fenix_Callback_register([recover, callback_data](MPI_Comm comm, int fenix_error){ + recover(comm, fenix_error, callback_data); + }); } int Fenix_Callback_pop() { return __fenix_callback_pop(); } +void Fenix_Callback_invoke_all() { + __fenix_callback_invoke_all(); +} + int Fenix_Initialized(int *flag) { *flag = (fenix.fenix_init_flag) ? 1 : 0; return FENIX_SUCCESS; @@ -81,17 +102,21 @@ int Fenix_Finalize() { return FENIX_SUCCESS; } -int Fenix_Data_group_create( int group_id, MPI_Comm comm, int start_time_stamp, int depth, int policy_name, - void* policy_value, int* flag) { - return __fenix_group_create(group_id, comm, start_time_stamp, depth, policy_name, policy_value, flag); +int Fenix_Data_group_create( + int group_id, MPI_Comm comm, int start_time_stamp, int depth, int policy, + void* policy_args, int* flag +) { + return group_create( + group_id, comm, start_time_stamp, depth, policy, policy_args, flag + ); } int Fenix_Data_member_create( int group_id, int member_id, void *buffer, int count, MPI_Datatype datatype ) { - return __fenix_member_create(group_id, member_id, buffer, count, __fenix_get_size(datatype)); + return member_create(group_id, member_id, buffer, count, datatype); } int Fenix_Data_group_get_redundancy_policy( int group_id, int* policy_name, void *policy_value, int *flag ) { - return __fenix_group_get_redundancy_policy( group_id, policy_name, policy_value, flag ); + return __fenix_group_get_redundancy_policy( group_id, policy_name, (int*)policy_value, flag ); } int Fenix_Data_wait(Fenix_Request request) { @@ -102,28 +127,28 @@ int Fenix_Data_test(Fenix_Request request, int *flag) { return __fenix_data_test(request, flag); } -int Fenix_Data_member_store(int group_id, int member_id, Fenix_Data_subset subset_specifier) { - return __fenix_member_store(group_id, member_id, subset_specifier); +int Fenix_Data_member_store(int group_id, int member_id, const Fenix_Data_subset subset) { + return member_store(group_id, member_id, *(DataSubset*)subset.impl); } -int Fenix_Data_member_storev(int group_id, int member_id, Fenix_Data_subset subset_specifier) { - return 0; +int Fenix_Data_member_storev(int group_id, int member_id, const Fenix_Data_subset subset) { + return member_storev(group_id, member_id, *(DataSubset*)subset.impl); } -int Fenix_Data_member_istore(int group_id, int member_id, Fenix_Data_subset subset_specifier, Fenix_Request *request) { - return 0; +int Fenix_Data_member_istore(int group_id, int member_id, const Fenix_Data_subset subset, Fenix_Request *request) { + return member_istore(group_id, member_id, *(DataSubset*)subset.impl, request); } -int Fenix_Data_member_istorev(int group_id, int member_id, Fenix_Data_subset subset_specifier, Fenix_Request *request) { - return 0; +int Fenix_Data_member_istorev(int group_id, int member_id, const Fenix_Data_subset subset, Fenix_Request *request) { + return member_istorev(group_id, member_id, *(DataSubset*)subset.impl, request); } int Fenix_Data_commit(int group_id, int *time_stamp) { - return __fenix_data_commit(group_id, time_stamp); + return commit(group_id, time_stamp); } int Fenix_Data_commit_barrier(int group_id, int *time_stamp) { - return __fenix_data_commit_barrier(group_id, time_stamp); + return commit_barrier(group_id, time_stamp); } int Fenix_Data_barrier(int group_id) { @@ -131,11 +156,29 @@ int Fenix_Data_barrier(int group_id) { } int Fenix_Data_member_restore(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found) { - return __fenix_member_restore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); + DataSubset* s = new DataSubset(); + int ret = member_restore( + group_id, member_id, target_buffer, max_count, time_stamp, *s + ); + if(data_found == nullptr){ + delete s; + } else { + data_found->impl = s; + } + return ret; } int Fenix_Data_member_lrestore(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found) { - return __fenix_member_lrestore(group_id, member_id, target_buffer, max_count, time_stamp, data_found); + DataSubset* s = new DataSubset(); + int ret = member_lrestore( + group_id, member_id, target_buffer, max_count, time_stamp, *s + ); + if(data_found == nullptr){ + delete s; + } else { + data_found->impl = s; + } + return ret; } int Fenix_Data_member_restore_from_rank(int group_id, int member_id, void *target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found, int source_rank) { @@ -155,11 +198,11 @@ int Fenix_Data_subset_delete(Fenix_Data_subset *subset_specifier) { } int Fenix_Data_group_get_number_of_members(int group_id, int *number_of_members) { - return 0; + return __fenix_get_number_of_members(group_id, number_of_members); } int Fenix_Data_group_get_member_at_position(int group_id, int *member_id, int position) { - return 0; + return __fenix_get_member_at_position(group_id, member_id, position); } int Fenix_Data_group_get_number_of_snapshots(int group_id, int *number_of_snapshots) { @@ -179,15 +222,15 @@ int Fenix_Data_member_attr_set(int group_id, int member_id, int attribute_name, } int Fenix_Data_snapshot_delete(int group_id, int time_stamp) { - return __fenix_snapshot_delete(group_id, time_stamp); + return snapshot_delete(group_id, time_stamp); } int Fenix_Data_group_delete(int group_id) { - return __fenix_group_delete(group_id); + return group_delete(group_id); } int Fenix_Data_member_delete(int group_id, int member_id) { - return __fenix_member_delete(group_id, member_id); + return member_delete(group_id, member_id); } int Fenix_Process_fail_list(int** fail_list){ @@ -213,3 +256,67 @@ int Fenix_check_cancelled(MPI_Request *request, MPI_Status *status){ int Fenix_Process_detect_failures(int do_recovery){ return __fenix_detect_failures(do_recovery); } + +Fenix_Rank_role Fenix_get_role(){ + return role(); +} + +int Fenix_get_error(){ + return error(); +} + +int Fenix_get_nspare(){ + return nspare(); +} + +namespace Fenix { + +void init(const Args::FenixInitArgs args){ + fenix_assert(args.resume_mode != JUMP, "Must use Fenix_Init to use the JUMP resume mode"); + + fenix_preinit(args); + __fenix_postinit(); +} + +void throw_exception(){ + throw CommException(*fenix.user_world, *fenix.ret_error); +} + +Fenix_Rank_role role(){ + return (Fenix_Rank_role) fenix.role; +} + +int error(){ + return fenix.repair_result; +} + +int nspare(){ + return fenix.spare_ranks; +} + +int callback_register(std::function callback){ + return __fenix_callback_register(callback); +} + +int callback_pop() { + return __fenix_callback_pop(); +} + +void callback_invoke_all() { + __fenix_callback_invoke_all(); +} + +std::vector fail_list(){ + if(fenix.fail_world_size == 0) return {}; + return {fenix.fail_world, fenix.fail_world+fenix.fail_world_size}; +} + +int detect_failures(bool recover){ + return __fenix_detect_failures(recover); +} + +bool initialized(){ + return fenix.fenix_init_flag; +} + +} // namespace Fenix diff --git a/src/fenix_mpi_override.c b/src/fenix_callbacks.cpp similarity index 61% rename from src/fenix_mpi_override.c rename to src/fenix_callbacks.cpp index 3761348..982db02 100644 --- a/src/fenix_mpi_override.c +++ b/src/fenix_callbacks.cpp @@ -53,24 +53,60 @@ // ************************************************************************ //@HEADER */ -#include "fenix_process_recovery.h" -#include "fenix_comm_list.h" -#include + #include -#include "fenix_ext.h" -static inline -int __fenix_notify_newcomm(int ret, MPI_Comm *newcomm) +#include "fenix_ext.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" +#include "fenix_exception.hpp" +#include + +using namespace Fenix; + +int __fenix_callback_register(fenix_callback_func& recover) { - if (ret != MPI_SUCCESS || - !fenix.fenix_init_flag || - *newcomm == MPI_COMM_NULL) return ret; - - if (__fenix_comm_push(newcomm) != FENIX_SUCCESS) { - fprintf(stderr, "[fenix error] Did not manage to push communicator\n"); - PMPI_Comm_free(newcomm); - ret = MPI_ERR_INTERN; - } + if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; + + fenix.callbacks.push_back(recover); + + return FENIX_SUCCESS; +} + +int __fenix_callback_pop(){ + if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; + if(fenix.callbacks.empty()) return FENIX_ERROR_CALLBACK_NOT_REGISTERED; + + fenix.callbacks.pop_back(); + + return FENIX_SUCCESS; +} + +void __fenix_callback_invoke_all(){ + //If callbacks are invoked in a nested manner due to caught exceptions + //within a callback, we want to only finish the most recent call. All prior + //calls should exit as soon as control returns. + static int callbacks_depth = 0; + int m_callbacks_layer = callbacks_depth++; + + try { + for(auto& cb : fenix.callbacks) { + if(callbacks_depth != m_callbacks_layer+1) break; + cb(*fenix.user_world, fenix.mpi_fail_code); + } + } catch (const CommException& e) { + switch(fenix.callback_exception_mode){ + case(RETHROW): + if(m_callbacks_layer == 0) callbacks_depth = 0; + throw; + case(SQUASH): + break; + } + } - return ret; + //Reset the callback depth when leaving the outermost call + if(m_callbacks_layer == 0) callbacks_depth = 0; } diff --git a/src/fenix_comm_list.c b/src/fenix_comm_list.c deleted file mode 100644 index d1b56d2..0000000 --- a/src/fenix_comm_list.c +++ /dev/null @@ -1,138 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include -#include - -fenix_comm_list_t my_list = {NULL, NULL}; - -int __fenix_comm_push(MPI_Comm *comm) { - fenix_comm_list_elm_t *current = (fenix_comm_list_elm_t *) malloc(sizeof(fenix_comm_list_elm_t)); - if (!current) return 0; - current->next = NULL; - current->comm = comm; - if (!my_list.tail) { - /* if list was empty, initialize head and tail */ - current->prev = NULL; - my_list.head = my_list.tail = current; - } - else { - /* if list was not empty, add element to the head of the list */ - current->prev = my_list.head; - my_list.head->next = current; - my_list.head = current; - } - return FENIX_SUCCESS; -} - -int __fenix_comm_delete(MPI_Comm *comm) { - - fenix_comm_list_elm_t *current = my_list.tail; - while (current) { - if (*(current->comm) == *comm) { - if (current != my_list.head && current != my_list.tail) { - current->prev->next = current->next; - current->next->prev = current->prev; - } - else if (current == my_list.tail) { - if (current->next) { - current->next->prev = NULL; - my_list.tail = current->next; - } - else my_list.tail = my_list.head = NULL; - } - else { - if (current->prev) { - current->prev->next = NULL; - my_list.head = current->prev; - } - else my_list.tail = my_list.head = NULL; - } - MPIX_Comm_revoke(*comm); - PMPI_Comm_free(comm); - free(current); - return 1; - } - else current = current->next; - } - /* if we end up here, the requested communicator has not been found */ - return 0; -} - - -void __fenix_comm_list_destroy(void) { - if (my_list.tail == NULL) { - return; - } - else { - fenix_comm_list_elm_t *current = my_list.tail; - while (current->next) { - fenix_comm_list_elm_t *new = current->next; - MPIX_Comm_revoke(*current->comm); - PMPI_Comm_free(current->comm); - free(current); - current = new; - } - MPIX_Comm_revoke(*current->comm); - PMPI_Comm_free(current->comm); - free(current); - } - my_list.tail = my_list.head = NULL; -} - diff --git a/src/fenix_data_group.c b/src/fenix_data_group.cpp similarity index 75% rename from src/fenix_data_group.c rename to src/fenix_data_group.cpp index ad453aa..928dbe1 100644 --- a/src/fenix_data_group.c +++ b/src/fenix_data_group.cpp @@ -54,18 +54,53 @@ //@HEADER */ +#include + #include "mpi.h" #include "fenix-config.h" -#include "fenix_ext.h" -#include "fenix_data_group.h" -#include "fenix_data_member.h" -#include "fenix_data_packet.h" +#include "fenix_ext.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_member.hpp" +#include "fenix_data_packet.hpp" +namespace Fenix::Data { +group_iterator find_group(int id){ + return find_group(id, fenix.data_recovery); +} + +group_iterator find_group(int id, fenix_data_recovery_t* dr){ + int index = __fenix_search_groupid(id, dr); + if(index == -1){ + debug_print("ERROR: group_id <%d> does not exist\n", id); + return {index, nullptr}; + } + return {index, dr->group[index]}; +} + + +member_iterator fenix_group_t::search_member(int id){ + auto iter = members.find(id); + if(iter == members.end()) return {-1, nullptr}; + assert(iter->first == iter->second.memberid); + return {std::distance(members.begin(), iter), &(iter->second)}; +} + +member_iterator fenix_group_t::find_member(int id){ + auto it = search_member(id); + if(it.first == -1) debug_print("ERROR group <%d>: member_id <%d> does not exist\n", groupid, id); + return it; +} + +std::vector fenix_group_t::get_member_ids(){ + std::vector ret; + ret.reserve(members.size()); + for(const auto& [k, v] : members){ + ret.push_back(k); + } + return ret; +} -/** - * @brief - */ fenix_data_recovery_t * __fenix_data_recovery_init() { fenix_data_recovery_t *data_recovery = (fenix_data_recovery_t *) s_calloc(1, sizeof(fenix_data_recovery_t)); @@ -84,13 +119,12 @@ fenix_data_recovery_t * __fenix_data_recovery_init() { return data_recovery; } -int __fenix_member_delete(int groupid, int memberid) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - if(group_index !=-1){ - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - } +int member_delete(int groupid, int memberid) { + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->find_member(memberid); + if(!mentry) return FENIX_ERROR_INVALID_MEMBERID; if (fenix.options.verbose == 38) { verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", @@ -98,38 +132,18 @@ int __fenix_member_delete(int groupid, int memberid) { member_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_delete: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_delete: memberid <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - fenix_group_t *group = (data_recovery->group[group_index]); - - retval = group->vtbl.member_delete(group, memberid); - - if(retval == FENIX_SUCCESS){ - fenix_member_t *member = group->member; - member->count--; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->state = DELETED; - } + int retval = group->member_delete(memberid); - if (fenix.options.verbose == 38) { - fenix_member_t *member = group->member; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-state: %d", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member->count, mentry->state); - } + if(retval == FENIX_SUCCESS){ + group->members.erase(memberid); + } - retval = FENIX_SUCCESS; + if (fenix.options.verbose == 38) { + verbose_print("c-rank: %d, role: %d, m-count: %zu", + __fenix_get_current_rank(fenix.new_world), fenix.role, + group->members.size()); } + return retval; } @@ -147,11 +161,13 @@ int __fenix_group_delete_direct(fenix_group_t* group){ //knowing how many members there are during its own deletion //process. - return group->vtbl.group_delete(group); + return group->group_delete(); } -int __fenix_data_recovery_remove_group(fenix_data_recovery_t* data_recovery, int group_index){ +int __fenix_data_recovery_remove_group(int group_index){ int retval = !FENIX_SUCCESS; + auto data_recovery = fenix.data_recovery; + if(group_index != -1){ for(int index = group_index; index < data_recovery->count-1; index++){ data_recovery->group[index] = data_recovery->group[index+1]; @@ -166,29 +182,22 @@ int __fenix_data_recovery_remove_group(fenix_data_recovery_t* data_recovery, int * @brief * @param group_id */ -int __fenix_group_delete(int groupid) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); +int group_delete(int groupid) { + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; if (fenix.options.verbose == 37) { verbose_print("c-rank: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), group_index); } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_group_delete: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - /* Delete Process */ - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - fenix_group_t *group = (data_recovery->group[group_index]); - retval = __fenix_group_delete_direct(group); - - if(retval == FENIX_SUCCESS){ - retval = __fenix_data_recovery_remove_group(data_recovery, group_index); - } + /* Delete Process */ + int retval = __fenix_group_delete_direct(group); + if(retval == FENIX_SUCCESS){ + retval = __fenix_data_recovery_remove_group(group_index); } + return retval; } @@ -271,3 +280,5 @@ int __fenix_find_next_group_position( fenix_data_recovery_t *data_recovery ) { __fenix_ensure_data_recovery_capacity(data_recovery); return data_recovery->count; } + +} //end namespace Fenix::Data diff --git a/src/fenix_data_member.c b/src/fenix_data_member.c deleted file mode 100644 index 3d9d60d..0000000 --- a/src/fenix_data_member.c +++ /dev/null @@ -1,291 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - -#include "mpi.h" -#include "fenix-config.h" -#include "fenix_ext.h" -#include "fenix_data_recovery.h" -#include "fenix_data_member.h" -#include "fenix_data_packet.h" - - -/** - * @brief - */ -fenix_member_t *__fenix_data_member_init() { - fenix_member_t *member = (fenix_member_t *) - s_calloc(1, sizeof(fenix_member_t)); - member->count = 0; - member->total_size = __FENIX_DEFAULT_MEMBER_SIZE; - member->member_entry = (fenix_member_entry_t *) s_malloc( - __FENIX_DEFAULT_MEMBER_SIZE * sizeof(fenix_member_entry_t)); - - if (fenix.options.verbose == 42) { - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, member->count, - member->total_size); - } - - int member_index; - for (member_index = 0; member_index < - __FENIX_DEFAULT_MEMBER_SIZE; member_index++) { // insert default values - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->memberid = -1; - mentry->state = EMPTY; - - if (fenix.options.verbose == 42) { - verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - mentry->memberid, mentry->state); - } - } - return member; -} - -void __fenix_data_member_destroy( fenix_member_t *member ) { - free( member->member_entry ); - free( member ); -} - -/** - * @brief - * @param - * @param - */ -int __fenix_search_memberid(fenix_member_t* member, int key) { - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - int member_index, found = -1, index = -1; - for (member_index = 0; - (found != 1) && (member_index < member->total_size); member_index++) { - - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - if (!(mentry->state == EMPTY || mentry->state == DELETED) && key == mentry->memberid) { - index = member_index; - found = 1; - } - } - return index; -} - - -/** - * @brief - * @param - */ -int __fenix_find_next_member_position(fenix_member_t *member) { - __fenix_ensure_member_capacity(member); - - int member_index, found = -1, index = -1; - for (member_index = 0; - (found != 1) && (member_index < member->total_size); member_index++) { - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - if (mentry->state == EMPTY || mentry->state == DELETED) { - index = member_index; - found = 1; - } - } - return index; -} - -fenix_member_entry_t* __fenix_data_member_add_entry(fenix_member_t* member, - int memberid, void* data, int count, int datatype_size){ - - int member_index = __fenix_find_next_member_position(member); - fenix_member_entry_t* mentry = member->member_entry + member_index; - - mentry->memberid = memberid; - mentry->state = OCCUPIED; - mentry->user_data = data; - mentry->current_count = count; - mentry->datatype_size = datatype_size; - - member->count++; - - return mentry; -} - -/** - * @brief - * @param - */ -void __fenix_ensure_member_capacity(fenix_member_t *m) { - fenix_member_t *member = m; - if (member->count +1 >= member->total_size) { - int start_index = member->total_size; - member->member_entry = (fenix_member_entry_t *) s_realloc(member->member_entry, - (member->total_size * 2) * - sizeof(fenix_member_entry_t)); - member->total_size = member->total_size * 2; - - if (fenix.options.verbose == 52) { - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member->count, member->total_size); - } - - int member_index; - for (member_index = start_index; member_index < member->total_size; member_index++) { - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->memberid = -1; - mentry->state = EMPTY; - - if (fenix.options.verbose == 52) { - verbose_print( - "c-rank: %d, role: %d, member[%d] m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member_index, mentry->memberid, mentry->state); - } - } - } -} - - -int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank){ - int retval = -1; - - fenix_data_recovery_t* data_recovery = fenix.data_recovery; - int group_index = __fenix_search_groupid(groupid, data_recovery); - int member_index; - if(group_index != -1){ - member_index = __fenix_search_memberid( - data_recovery->group[group_index]->member, memberid); - } - - if(group_index == -1){ - debug_print("ERROR Fenix_Data_member_delete: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if(member_index == -1){ - debug_print("ERROR Fenix_Data_member_delete: memberid <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = data_recovery->group[group_index]; - fenix_member_entry_t mentry = group->member->member_entry[member_index]; - - fenix_member_entry_packet_t packet; - packet.memberid = mentry.memberid; - packet.datatype_size = mentry.datatype_size; - packet.current_count = mentry.current_count; - - MPI_Send(&packet, sizeof(packet), MPI_BYTE, dest_rank, RECOVER_MEMBER_ENTRY_TAG^groupid, - group->comm); - - retval = FENIX_SUCCESS; - } - - return retval; -} - -int __fenix_data_member_recv_metadata(int groupid, int src_rank, - fenix_member_entry_packet_t* packet){ - int retval = -1; - - fenix_data_recovery_t* data_recovery = fenix.data_recovery; - int group_index = __fenix_search_groupid(groupid, data_recovery); - - if(group_index == -1){ - debug_print("ERROR Fenix_Data_member_delete: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t* group = data_recovery->group[group_index]; - - MPI_Recv((void*)packet, sizeof(fenix_member_entry_packet_t), MPI_BYTE, src_rank, - RECOVER_MEMBER_ENTRY_TAG^groupid, group->comm, NULL); - - retval = FENIX_SUCCESS; - } - - - return retval; -} - - -/** - * @brief - * @param - * @param - */ -void __fenix_data_member_reinit(fenix_member_t *m, fenix_two_container_packet_t packet, - enum states mystatus) { - fenix_member_t *member = m; - int start_index = member->total_size; - member->count = 0; - member->total_size = packet.total_size; - member->member_entry = (fenix_member_entry_t *) s_realloc(member->member_entry, - (member->total_size) * - sizeof(fenix_member_entry_t)); - if (fenix.options.verbose == 50) { - verbose_print("c-rank: %d, role: %d, m-count: %zu, m-size: %zu\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - member->count, member->total_size); - } - - int member_index; - /* Why start_index is set to the number of member entries ? */ - // for (member_index = start_index; member_index < member->size; member_index++) { - for (member_index = 0; member_index < member->total_size; member_index++) { - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - mentry->memberid = -1; - mentry->state = mystatus; - if (fenix.options.verbose == 50) { - verbose_print("c-rank: %d, role: %d, m-memberid: %d, m-state: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, - mentry->memberid, mentry->state); - } - } -} diff --git a/src/fenix_callbacks.c b/src/fenix_data_member.cpp similarity index 52% rename from src/fenix_callbacks.c rename to src/fenix_data_member.cpp index 8779402..b17e7a0 100644 --- a/src/fenix_callbacks.c +++ b/src/fenix_data_member.cpp @@ -45,7 +45,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock +// Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -54,83 +54,86 @@ //@HEADER */ -#include - -#include "fenix_comm_list.h" -#include "fenix_ext.h" -#include "fenix_process_recovery.h" -#include "fenix_data_group.h" -#include "fenix_data_recovery.h" -#include "fenix_opt.h" -#include "fenix_util.h" -#include - - -int __fenix_callback_register(void (*recover)(MPI_Comm, int, void *), void *callback_data) -{ - int error_code = FENIX_SUCCESS; - if (fenix.fenix_init_flag) { - fenix_callback_func *fp = s_malloc(sizeof(fenix_callback_func)); - fp->x = recover; - fp->y = callback_data; - __fenix_callback_push( &fenix.callback_list, fp); - } else { - error_code = FENIX_ERROR_UNINITIALIZED; - } - return error_code; -} +#include "mpi.h" +#include "fenix-config.h" +#include "fenix_ext.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_data_member.hpp" +#include "fenix_data_packet.hpp" -int __fenix_callback_pop(){ - if(!fenix.fenix_init_flag) return FENIX_ERROR_UNINITIALIZED; - if(fenix.callback_list == NULL) return FENIX_ERROR_CALLBACK_NOT_REGISTERED; - - fenix_callback_list_t* old_head = fenix.callback_list; - fenix.callback_list = old_head->next; - free(old_head->callback); - free(old_head); +namespace Fenix::Data { - return FENIX_SUCCESS; +fenix_member_entry_packet_t +fenix_member_entry_t::to_packet(){ + fenix_member_entry_packet_t to_ret; + to_ret.memberid = memberid; + to_ret.datatype_size = datatype_size; + to_ret.current_count = current_count; + return to_ret; } -void __fenix_callback_invoke_all(int error) -{ - fenix_callback_list_t *current = fenix.callback_list; - while (current != NULL) { - (current->callback->x)((MPI_Comm) fenix.new_world, error, - (void *) current->callback->y); - current = current->next; - } +/** + * @brief + * @param + * @param + */ +int __fenix_search_memberid(fenix_group_t* group, int key) { + return group->search_member(key).first; } -void __fenix_callback_push(fenix_callback_list_t **head, fenix_callback_func *fp) -{ - fenix_callback_list_t *callback = malloc(sizeof(fenix_callback_list_t)); - callback->callback = fp; - callback->next = *head; - *head = callback; + +fenix_member_entry_t* __fenix_data_member_add_entry(fenix_group_t* group, + int memberid, void* data, int count, int datatype_size){ + fenix_member_entry_t mentry; + mentry.memberid = memberid; + mentry.state = OCCUPIED; + mentry.user_data = (char*)data; + mentry.current_count = count; + mentry.datatype_size = datatype_size; + group->members[memberid] = mentry; + + return &group->members[memberid]; } -int __fenix_callback_destroy(fenix_callback_list_t *callback_list) -{ - int error_code = FENIX_SUCCESS; +int __fenix_data_member_send_metadata(int groupid, int memberid, int dest_rank){ + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; - if ( fenix.fenix_init_flag ) { + auto [member_index, member] = group->find_member(memberid); + if(!member) return FENIX_ERROR_INVALID_MEMBERID; - fenix_callback_list_t *current = callback_list; + fenix_member_entry_packet_t packet; + packet.memberid = member->memberid; + packet.datatype_size = member->datatype_size; + packet.current_count = member->current_count; - while (current != NULL) { - fenix_callback_list_t *old; - old = current; - current = current->next; - free( old->callback ); - free( old ); - } + MPI_Send(&packet, sizeof(packet), MPI_BYTE, dest_rank, RECOVER_MEMBER_ENTRY_TAG^groupid, + group->comm); + + return FENIX_SUCCESS; +} + +int __fenix_data_member_recv_metadata(int groupid, int src_rank, + fenix_member_entry_packet_t* packet){ + auto group = find_group(groupid).second; + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + MPI_Recv((void*)packet, sizeof(fenix_member_entry_packet_t), MPI_BYTE, src_rank, + RECOVER_MEMBER_ENTRY_TAG^groupid, group->comm, NULL); + + return FENIX_SUCCESS; +} - } else { - error_code = FENIX_ERROR_UNINITIALIZED; - } - return error_code; +/** + * @brief + * @param + * @param + */ +void __fenix_data_member_reinit(fenix_group_t *group, fenix_two_container_packet_t packet, + enum states mystatus) { + group->members.clear(); } +} //namespace Fenix::Data diff --git a/src/fenix_data_policy.c b/src/fenix_data_policy.cpp similarity index 92% rename from src/fenix_data_policy.c rename to src/fenix_data_policy.cpp index 603aff1..f0de9c4 100644 --- a/src/fenix_data_policy.c +++ b/src/fenix_data_policy.cpp @@ -55,16 +55,14 @@ */ #include -#include "fenix_data_policy_in_memory_raid.h" -#include "fenix_data_policy.h" -#include "fenix_data_group.h" -#include "fenix_opt.h" -#include "fenix_ext.h" +#include "fenix_data_policy_in_memory_raid.hpp" +#include "fenix_data_policy.hpp" +#include "fenix_data_group.hpp" +#include "fenix_opt.hpp" #include "fenix.h" -/** - *@brief - **/ +namespace Fenix::Data { + int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, int timestart, int depth, int policy_name, void* policy_value, int* flag){ @@ -72,7 +70,7 @@ int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, switch (policy_name){ case FENIX_DATA_POLICY_IN_MEMORY_RAID: - __fenix_policy_in_memory_raid_get_group(group, comm, timestart, + IMR::__fenix_policy_in_memory_raid_get_group(group, comm, timestart, depth, policy_value, flag); retval = FENIX_SUCCESS; break; @@ -84,3 +82,5 @@ int __fenix_policy_get_group(fenix_group_t** group, MPI_Comm comm, return retval; } + +} diff --git a/src/fenix_data_policy_in_memory_raid.c b/src/fenix_data_policy_in_memory_raid.c deleted file mode 100644 index 897f163..0000000 --- a/src/fenix_data_policy_in_memory_raid.c +++ /dev/null @@ -1,1256 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include "fenix.h" -#include "fenix_ext.h" -#include "fenix_opt.h" -#include "fenix_data_subset.h" -#include "fenix_data_recovery.h" -#include "fenix_data_policy.h" -#include "fenix_data_group.h" -#include "fenix_data_member.h" - -#define __FENIX_IMR_DEFAULT_MENTRY_NUM 10 -#define __FENIX_IMR_NO_MEMBERS 16000 -#define __IMR_RECOVER_DATA_REGION_TAG 97854 - -#define STORE_PAYLOAD_TAG 2004 - -int __imr_group_delete(fenix_group_t* group); -int __imr_member_create(fenix_group_t* group, fenix_member_entry_t* mentry); -int __imr_member_delete(fenix_group_t* group, int member_id); -int __imr_get_redundant_policy(fenix_group_t*, int* policy_name, - void* policy_value, int* flag); -int __imr_member_store(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); -int __imr_member_storev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier); -int __imr_member_istore(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); -int __imr_member_istorev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request); -int __imr_commit(fenix_group_t* group); -int __imr_snapshot_delete(fenix_group_t* group, int time_stamp); -int __imr_barrier(fenix_group_t* group); -int __imr_member_restore(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); -int __imr_member_lrestore(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - Fenix_Data_subset* data_found); -int __imr_member_restore_from_rank(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - int source_rank); -int __imr_member_get_attribute(fenix_group_t* group, fenix_member_entry_t* member, - int attributename, void* attributevalue, int* flag, int sourcerank); -int __imr_member_set_attribute(fenix_group_t* group, fenix_member_entry_t* member, - int attributename, void* attributevalue, int* flag); -int __imr_get_number_of_snapshots(fenix_group_t* group, - int* number_of_snapshots); -int __imr_get_snapshot_at_position(fenix_group_t* group, int position, - int* time_stamp); -int __imr_reinit(fenix_group_t* group, int* flag); - -typedef struct __fenix_imr_mentry{ - void** data; - Fenix_Data_subset* data_regions; - int* timestamp; - int current_head; - int memberid; -} fenix_imr_mentry_t; - -typedef struct __fenix_imr_group{ - fenix_group_t base; - int raid_mode; - int rank_separation; - int* partners; - int set_size; - MPI_Comm set_comm; - int entries_size; - int entries_count; - fenix_imr_mentry_t* entries; - int num_snapshots; - int* timestamps; -} fenix_imr_group_t; - -typedef struct __fenix_imr_undo_log{ - int groupid, memberid; -} fenix_imr_undo_log_t; - -void __imr_sync_timestamps(fenix_imr_group_t* group); - -void __imr_undo_restore(MPI_Comm comm, int err, void* data){ - fenix_imr_undo_log_t* undo_log = (fenix_imr_undo_log_t*)data; - - Fenix_Data_member_delete(undo_log->groupid, undo_log->memberid); - - free(data); - Fenix_Callback_pop(); //Should be this callback itself. -} - - -void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, - int timestart, int depth, void* policy_value, int* flag){ - *group = (fenix_group_t *)malloc(sizeof(fenix_imr_group_t)); - fenix_imr_group_t *new_group = (fenix_imr_group_t *)(*group); - new_group->base.vtbl.group_delete = *__imr_group_delete; - new_group->base.vtbl.member_create = *__imr_member_create; - new_group->base.vtbl.member_delete = *__imr_member_delete; - new_group->base.vtbl.get_redundant_policy = *__imr_get_redundant_policy; - new_group->base.vtbl.member_store = *__imr_member_store; - new_group->base.vtbl.member_storev = *__imr_member_storev; - new_group->base.vtbl.member_istore = *__imr_member_istore; - new_group->base.vtbl.member_istorev = *__imr_member_istorev; - new_group->base.vtbl.commit = *__imr_commit; - new_group->base.vtbl.snapshot_delete = *__imr_snapshot_delete; - new_group->base.vtbl.barrier = *__imr_barrier; - new_group->base.vtbl.member_restore = *__imr_member_restore; - new_group->base.vtbl.member_lrestore = *__imr_member_lrestore; - new_group->base.vtbl.member_restore_from_rank = *__imr_member_restore_from_rank; - new_group->base.vtbl.member_get_attribute = *__imr_member_get_attribute; - new_group->base.vtbl.member_set_attribute = *__imr_member_set_attribute; - new_group->base.vtbl.get_number_of_snapshots = *__imr_get_number_of_snapshots; - new_group->base.vtbl.get_snapshot_at_position = *__imr_get_snapshot_at_position; - new_group->base.vtbl.reinit = *__imr_reinit; - - int* policy_vals = (int*)policy_value; - new_group->raid_mode = policy_vals[0]; - new_group->rank_separation = policy_vals[1]; - - int my_rank, comm_size; - MPI_Comm_size(comm, &comm_size); - MPI_Comm_rank(comm, &my_rank); - - if(new_group->raid_mode == 1){ - //Set up the person who's data I am storing as partner 0 - //Set up the person who is storing my data as partner 1 - new_group->partners = (int*) malloc(sizeof(int) * 2); - - //odd-sized groups take some extra handling. - bool isOdd = ((comm_size%2) != 0); - - int remaining_size = comm_size; - if(isOdd) remaining_size -= 3; - - //We want to form groups of rank_separation*2 to pair within - int n_full_groups = remaining_size / (new_group->rank_separation*2); - - //We don't always get what we want though, one group may need to be smaller. - int mini_group_size = (remaining_size - n_full_groups*new_group->rank_separation*2)/2; - - - int start_rank = mini_group_size + (isOdd?1:0); - int mid_rank = comm_size/2; //Only used when isOdd - - int end_mini_group_start = comm_size-mini_group_size-(isOdd?1:0); - int start_mini_group_start = (isOdd?1:0); - bool in_start_mini=false, in_end_mini=false; - - if(my_rank >= start_mini_group_start && my_rank < start_mini_group_start+mini_group_size){ - in_start_mini = true; - } else if(my_rank >= end_mini_group_start && my_rank < comm_size-(isOdd?1:0)){ - in_end_mini = true; - } - - //Allocate the "normal" ranks - if(my_rank >= start_rank && my_rank < end_mini_group_start && (!isOdd || my_rank != mid_rank)){ - //"effective" rank for determining which group I'm in and if I look forward or backward for a partner. - int e_rank = my_rank - start_rank; - if(isOdd && my_rank > mid_rank) --e_rank; //We skip the middle rank when isOdd - - int my_partner; - if(((e_rank/new_group->rank_separation)%2) == 0){ - //Look forward for partner. - my_partner = my_rank + new_group->rank_separation; - if(isOdd && my_rank < mid_rank && my_partner >= mid_rank) ++my_partner; - } else { - my_partner = my_rank - new_group->rank_separation; - if(isOdd && my_rank > mid_rank && my_partner <= mid_rank) --my_partner; - } - - new_group->partners[0] = my_partner; - new_group->partners[1] = my_partner; - } else if(in_start_mini) { - int e_rank = my_rank - start_mini_group_start; - int partner = end_mini_group_start + e_rank; - new_group->partners[0] = partner; - new_group->partners[1] = partner; - } else if(in_end_mini) { - int e_rank = my_rank - end_mini_group_start; - int partner = start_mini_group_start + e_rank; - new_group->partners[0] = partner; - new_group->partners[1] = partner; - } else { //Only things left are the three ranks that must be paired to handle odd-sized comms - if(my_rank == 0){ - new_group->partners[0] = comm_size-1; - new_group->partners[1] = mid_rank; - } else if(my_rank == mid_rank){ - new_group->partners[0] = 0; - new_group->partners[1] = comm_size-1; - } else if(my_rank == comm_size-1){ - new_group->partners[0] = mid_rank; - new_group->partners[1] = 0; - } else { - fprintf(stderr, "FENIX_IMR Fatal error: Rank <%d> no partner assigned, this is a bug in IMR!\n", my_rank); - *flag = FENIX_ERROR_GROUP_CREATE; - return; - } - } - - - } else if(new_group->raid_mode == 5){ - new_group->set_size = policy_vals[2]; - new_group->partners = (int*) malloc(sizeof(int) * new_group->set_size); - - //User is responsible for giving values that "make sense" for set size and rank separation given a comm size. - int my_set_pos = (my_rank/new_group->rank_separation)%new_group->set_size; - for(int index = 0; index < new_group->set_size; index++){ - new_group->partners[index] = (comm_size + my_rank - (new_group->rank_separation * (my_set_pos-index)))%comm_size; - } - - //Build a comm to use for all of the set's reductions we'll need to do for RAID 5. - MPI_Group comm_group, set_group; - MPI_Comm_group(comm, &comm_group); - MPI_Group_incl(comm_group, new_group->set_size, new_group->partners, &set_group); - MPI_Comm_create_group(comm, set_group, 0, &(new_group->set_comm)); - - } - - new_group->entries_size = __FENIX_IMR_DEFAULT_MENTRY_NUM; - new_group->entries_count = 0; - new_group->entries = - (fenix_imr_mentry_t*) malloc(sizeof(fenix_imr_mentry_t) * __FENIX_IMR_DEFAULT_MENTRY_NUM); - new_group->num_snapshots = 0; - new_group->timestamps = (int*)malloc(sizeof(int)*depth); - - new_group->base.comm = comm; - new_group->base.current_rank = my_rank; - __imr_sync_timestamps(new_group); - *flag = FENIX_SUCCESS; -} - -//Sets mentry to point to the right index for a given memberid -//If there are no members, the mentry pointer will be invalid and __FENIX_IMR_NO_MEMBERS will be returned. -//If the given memberid is not found, points to the closest and returns anything but FENIX_SUCCESS. -int __imr_find_mentry(fenix_imr_group_t* group, int memberid, fenix_imr_mentry_t** mentry){ - //List is sorted by member id, do binary search. - int retval = -1; - unsigned lower_bound = 0; - unsigned upper_bound = group->entries_count - 1; - - - if(group->entries_count == 0){ - upper_bound = 0; - retval = __FENIX_IMR_NO_MEMBERS; - } - - while(lower_bound != upper_bound){ - unsigned to_check = (lower_bound + upper_bound)>>1; - - if(group->entries[to_check].memberid == memberid){ - lower_bound = upper_bound = to_check; - } else if(group->entries[to_check].memberid < memberid){ - lower_bound = to_check + 1; - if(lower_bound > upper_bound) lower_bound = upper_bound; - } else { - upper_bound = to_check - 1; - if(lower_bound > upper_bound) upper_bound = lower_bound; - } - } - - *mentry = group->entries + lower_bound; - if(retval != __FENIX_IMR_NO_MEMBERS && (*mentry)->memberid == memberid){ - retval = FENIX_SUCCESS; - } - return retval; -} - -void __imr_alloc_data_region(void** region, int raid_mode, int local_data_size, int set_size){ - if(raid_mode == 1){ - *region = (void*) malloc(2*local_data_size); - } else if(raid_mode == 5){ - //We need space for our own local data, as well as space for the parity data - //We add two just in case the data size isn't evenly divisible by set_size-1 - // 3 is needed because making the parity one larger on some nodes requires - // extra bits of "data" on the other nodes - *region = (void*) malloc(local_data_size + local_data_size/(set_size - 1) + 3); - } else { - debug_print("Error: raid mode <%d> not supported\n", raid_mode); - } -} - -int __imr_member_create(fenix_group_t* g, fenix_member_entry_t* mentry){ - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - int retval = -1; - - fenix_imr_mentry_t* closest_imr_mentry; - int found_memberid = __imr_find_mentry(group, mentry->memberid, &closest_imr_mentry); - - if(found_memberid == FENIX_SUCCESS){ - debug_print("Error Fenix_Data_member_create: member_id <%d> already exists in this policy\n", - mentry->memberid); - } else { - //Double check that we have room for the member. - if(group->entries_count >= group->entries_size){ - group->entries = (fenix_imr_mentry_t*) s_realloc(group->entries, - group->entries_size * 2 * sizeof(fenix_imr_mentry_t)); - group->entries_size *= 2; - } - - fenix_imr_mentry_t* new_imr_mentry; - if(found_memberid == __FENIX_IMR_NO_MEMBERS){ - //This is the first member, it goes at the beginning. - new_imr_mentry = group->entries; - } else { - int closest_index = closest_imr_mentry - group->entries; - //Do we want to place this new member before or after - //the closest member? - if(mentry->memberid > closest_imr_mentry->memberid){ - //Move all entries after the closest to one farther to right, b/c I belong - //right after the closest. - memmove(group->entries+closest_index +1, group->entries+closest_index +2, - group->entries_size - closest_index+1); - new_imr_mentry = group->entries+closest_index+1; - } else { - //Move all entries starting w/ closest to one farther to right, b/c I belong - //right before the closest. - memmove(group->entries+closest_index, group->entries+closest_index +1, - group->entries_size - closest_index); - new_imr_mentry = group->entries+closest_index; - } - } - - //Now I've got the location to store this member, - //so I just need to actually fill in the data. - new_imr_mentry->current_head = 0; - new_imr_mentry->memberid = mentry->memberid; - - new_imr_mentry->data = (void**) malloc( (group->base.depth+2) * sizeof(void*)); - int local_data_size = mentry->datatype_size * mentry->current_count; - new_imr_mentry->data_regions = - (Fenix_Data_subset *)malloc(sizeof(Fenix_Data_subset) * (group->base.depth+2) ); - new_imr_mentry->timestamp = (int*) malloc(sizeof(int) * (group->base.depth + 2)); - - for(int i = 0; i < group->base.depth + 2; i++){ - __imr_alloc_data_region(new_imr_mentry->data + i, group->raid_mode, local_data_size, group->set_size); - - //Initialize to smallest # blocks allowed. - __fenix_data_subset_init(1, new_imr_mentry->data_regions + i); - new_imr_mentry->data_regions[i].specifier = __FENIX_SUBSET_EMPTY; - } - - group->entries_count++; - - retval = FENIX_SUCCESS; - } - - return retval; -} - -void __imr_member_free(fenix_imr_mentry_t* mentry, int depth){ - //Start by clearing out the mentry's data pointers. - for(int i = 0; i < depth + 2; i++){ - __fenix_data_subset_free(mentry->data_regions + i); - free(mentry->data[i]); - } - - free(mentry->data); - free(mentry->data_regions); - free(mentry->timestamp); -} - -int __imr_member_delete(fenix_group_t* g, int member_id){ - int retval = FENIX_SUCCESS; - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - //Find the member first - fenix_imr_mentry_t *mentry; - int found_member = __imr_find_mentry(group, member_id, &mentry); - - if(found_member != FENIX_SUCCESS){ - debug_print("ERROR Fenix_Data_member_delete: member_id <%d> does not exist!\n", - member_id); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - - //Free all of the pointers in the mentry - __imr_member_free(mentry, group->base.depth); - - //Now shift all the subsequent mentries back one, unless I'm already the last one. - int member_index = mentry - group->entries; - if(member_index != (group->entries_count-1) ){ - memmove(mentry, mentry+1, group->entries_count - 1 - member_index); - } - - group->entries_count--; - } - return retval; -} - - - -int __imr_member_store(fenix_group_t* g, int member_id, - Fenix_Data_subset subset_specifier){ - int retval = -1; - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - - fenix_imr_mentry_t* mentry; - int found_member = __imr_find_mentry(group, member_id, &mentry); - - fenix_member_entry_t* member_data; - //Shouldn't need to check for failure to find the member, that should be done before - //calling - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = &(group->base.member->member_entry[member_data_index]); - - if(found_member != FENIX_SUCCESS){ - debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist on rank <%d>!\n", - member_id, g->current_rank); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - //Copy my own data, trade data with partner, update data region - //Store my data at the beginning of the member's buffer, resiliency data after that. - __fenix_data_subset_copy_data(&subset_specifier, mentry->data[mentry->current_head], - member_data->user_data, member_data->datatype_size, member_data->current_count); - - if(group->raid_mode == 1){ - - size_t serialized_size; - void* serialized = __fenix_data_subset_serialize(&subset_specifier, - mentry->data[mentry->current_head], member_data->datatype_size, - member_data->current_count, &serialized_size); - - void* recv_buf = malloc(serialized_size * member_data->datatype_size); - - MPI_Sendrecv(serialized, serialized_size * member_data->datatype_size, MPI_BYTE, - group->partners[1], group->base.groupid ^ STORE_PAYLOAD_TAG, - recv_buf, serialized_size * member_data->datatype_size, MPI_BYTE, - group->partners[0], group->base.groupid ^ STORE_PAYLOAD_TAG, - group->base.comm, NULL); - - //Expand the serialized data out and store into the partner's portion of this data entry. - __fenix_data_subset_deserialize(&subset_specifier, recv_buf, - ((uint8_t*)mentry->data[mentry->current_head]) + member_data->datatype_size*member_data->current_count, - member_data->current_count, member_data->datatype_size); - - free(recv_buf); - free(serialized); - - retval = FENIX_SUCCESS; - } else if(group->raid_mode == 5){ - //TODO: Try to optimize for partial commits - currently does parity on the whole region regardless of commit area. - //TODO: I'm not sure if this is the best way to do this - could be a bottleneck if this is unoptimized since this - // could be running on a lot of data. - - //Why does this do it this way? - //In order to do recovery on a given block of data, we need to be missing only 1 of: - // all of the data in the corresponding blocks and the parity for those blocks - //Standard RAID does this by having one disk store parity for a given block instead of data, but this assumes - // that there is no benefit to data locality - in our case we want each node to have a local copy of its own - // data, preferably in a single (virtually) continuous memory range for data movement optimization. So we'll - // store the local data, then put 1/N of the parity data at the bottom of the commit. - //The weirdness comes from the fact that a given node CANNOT contribute to the data being checked for parity which - // will be stored on itself. IE, a node cannot save both a portion of the data and the parity for that data portion - - // doing so would mean if that node fails it is as if we lost two nodes for recovery semantics, making every failure - // non-recoverable. - // This means we need to do an XOR reduction across every node but myself, then store the result on myself - this is - // a little awkward with MPI's reductions which require full comm participation and do not receive any information about - // the source of a given chunk of data (IE we can't exclude data from node X, as we want to). - //This is easily doable using MPI send/recvs, but doing it that way neglects all of the data/comm size optimizations, - // as well as any block XOR optimizations from MPI's reduction operations. - //We could do something like an alltoallv to send appropriate data to each node, then let them calculate parity info locally - // However, we have to either allocate space to hold an extra copy of the entire data size, or we overwrite our - // local buffer and have to re-distribute the data afterward. - //I think the best way to handle it will be to manipulate the XOR function. We will do a reduction which uses local data - // that we do not actually want involved in calculating the parity. Then, we will XOR the local data with the result - // to get the accurate parity info. - // This involves computing the XOR on an extra 2/(set_size-1)*parity_size of data, but minimizes excess memory allocation - // and network use. Scales well with higher set sizes. - int parity_size = (member_data->datatype_size * member_data->current_count)/(group->set_size - 1); - int remainder = (member_data->datatype_size * member_data->current_count)%(group->set_size - 1); - - if(remainder != 0) remainder++; - - void* data_buf = mentry->data[mentry->current_head]; - //store parity info after my data in data region. - //we always have a spare data buffer byte for rounding stuff, so store after that as well. - void* parity_buf = (void*)((char*)data_buf + member_data->datatype_size*member_data->current_count + 2); - - int my_set_rank; - MPI_Comm_rank(group->set_comm, &my_set_rank); - int offset = 0, rounding_compensator; - for(int i = 0; i < group->set_size; i++){ - //Last node is an edge case. - if((my_set_rank == group->set_size-1) && i==my_set_rank){ - offset = 0; - } - - MPI_Reduce((char*)data_buf + offset, parity_buf, parity_size + (i < remainder ? 1 : 0), MPI_BYTE, - MPI_BXOR, i, group->set_comm); - if(i != my_set_rank){ - offset += parity_size + (i < remainder ? 1 : 0); - } - } - - //Each node has buffer which contains parity^some_local_data, so now pull parity from that. - offset = my_set_rank * parity_size + (my_set_rank < remainder ? my_set_rank : remainder); - - //As above, last node is an edge case. - if(my_set_rank == group->set_size - 1){ - offset = 0; - } - - //Utilize MPI's local XOR function, assuming it is more optimized than a naive implementation would be. - MPI_Reduce_local((void*)((char*)data_buf + offset), parity_buf, parity_size + (my_set_rank < remainder ? 1 : 0), - MPI_BYTE, MPI_BXOR); - - //Finally, each node has the right stuff. - - retval = FENIX_SUCCESS; - } else { - debug_print("ERROR Fenix_Data_member_store: Raid mode <%d> is not supported yet!\n", - group->raid_mode); - retval = FENIX_ERROR_UNINITIALIZED; - } - - //Make sure to update which data regions this entry contains. - __fenix_data_subset_merge_inplace(mentry->data_regions + mentry->current_head, &subset_specifier); - - } - - - return retval; -} - - - - - -int __imr_member_storev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier){return 0;} -int __imr_member_istore(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request){return 0;} -int __imr_member_istorev(fenix_group_t* group, int member_id, - Fenix_Data_subset subset_specifier, Fenix_Request *request){return 0;} - - - -int __imr_commit(fenix_group_t* g){ - //No sources of error for this one yet. - int to_return = FENIX_SUCCESS; - - fenix_imr_group_t *group = (fenix_imr_group_t*)g; - - if(group->num_snapshots == group->base.depth+1){ - //Full of timestamps, remove the oldest and proceed as normal. - memcpy(group->timestamps, group->timestamps+1, group->base.depth); - group->num_snapshots--; - } - group->timestamps[group->num_snapshots++] = group->base.timestamp; - - - //For each entry id (eid) - for(int eid = 0; eid < group->entries_count; eid++){ - fenix_imr_mentry_t *mentry = &group->entries[eid]; - - if(mentry->current_head == group->base.depth + 1){ - //The entry is full, one snapshot should be shifted out. - - //Save this pointer to reuse the allocated memory - void* first_data = mentry->data[0]; - - for(int snapshot = 0; snapshot < group->base.depth + 1; snapshot++){ - //lightweight movement, just moving the pointers about. - mentry->data[snapshot] = mentry->data[snapshot + 1]; - __fenix_data_subset_deep_copy(mentry->data_regions + snapshot + 1, - mentry->data_regions + snapshot); - mentry->timestamp[snapshot] = mentry->timestamp[snapshot + 1]; - } - - mentry->data[group->base.depth + 1] = first_data; - mentry->data_regions[group->base.depth + 1].specifier = __FENIX_SUBSET_EMPTY; - mentry->current_head--; - } - - mentry->timestamp[mentry->current_head++] = group->base.timestamp; - } - - return to_return; -} - - -int __imr_snapshot_delete(fenix_group_t* g, int time_stamp){ - int retval = FENIX_SUCCESS; - - fenix_imr_group_t *group = (fenix_imr_group_t*)g; - - for(int entry_id = 0; entry_id < group->entries_count && retval == FENIX_SUCCESS; entry_id++){ - //Search for the timestamp in each group. Given how commits and deletes work, we know - //the snapshots are sorted by timestamp in the arrays. - fenix_imr_mentry_t mentry = group->entries[entry_id]; - - //current_head is the staging area's entry, so start before that and work backwards. - //We'll work backwards under the assumption that snapshots are likely to be deleted soon after creation. - // (Does this assumption seem valid?) - for(int snapshot = mentry.current_head - 1; snapshot >= 0 && retval == FENIX_SUCCESS; snapshot--){ - if(mentry.timestamp[snapshot] < time_stamp){ - retval = FENIX_ERROR_INVALID_TIMESTAMP; - - } else if(mentry.timestamp[snapshot] == time_stamp){ - void* old_data = mentry.data[snapshot]; - - for(int to_shift = snapshot; to_shift < mentry.current_head; to_shift++){ - mentry.timestamp[to_shift] = mentry.timestamp[to_shift + 1]; - __fenix_data_subset_deep_copy(mentry.data_regions + to_shift+1, - mentry.data_regions + to_shift); - mentry.data[to_shift] = mentry.data[to_shift + 1]; - } - mentry.data[mentry.current_head] = old_data; - mentry.data_regions[mentry.current_head].specifier = __FENIX_SUBSET_EMPTY; - - mentry.current_head--; - break; - } - } - } - - if(retval == FENIX_SUCCESS){ - group->num_snapshots--; - } - - return retval; -} - - - -int __imr_barrier(fenix_group_t* group){return 0;} - - -int __imr_get_number_of_snapshots(fenix_group_t* group, - int* number_of_snapshots){ - return ((fenix_imr_group_t*)group)->num_snapshots; -} - -int __imr_get_snapshot_at_position(fenix_group_t* g, int position, - int* time_stamp){ - int retval = -1; - - fenix_imr_group_t *group = (fenix_imr_group_t*)g; - - if(!(position < group->num_snapshots)){ - retval = FENIX_ERROR_INVALID_POSITION; - } else { - //Each member ought to have the same snapshots, in the same order. - //If this isn't true, some other bug has occurred. Thus, we will just - //query the first member. - *time_stamp = group->entries[0].timestamp[group->entries[0].current_head - 1 - position]; - retval = FENIX_SUCCESS; - } - - return retval; -} - - -int __imr_member_restore(fenix_group_t* g, int member_id, - void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found){ - int retval = -1; - - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - //One-time fix after a reinit. - if(group->base.timestamp == -1 && group->num_snapshots > 0) - group->base.timestamp = group->timestamps[group->num_snapshots-1]; - - fenix_imr_mentry_t* mentry; - //find_mentry returns the error status. We found the member (and corresponding data) if there are no errors. - int found_member = !(__imr_find_mentry(group, member_id, &mentry)); - - fenix_member_entry_t member_data; - if(found_member){ - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = group->base.member->member_entry[member_data_index]; - } - - int recovery_locally_possible; - - fenix_imr_undo_log_t* undo_data; //Used for undoing partial restores interrupted by failures. - - if(group->raid_mode == 1){ - int my_data_found, partner_data_found; - - //We need to know if both partners found their data. - //First send to partner 0 and recv from partner 1, then flip. - MPI_Sendrecv(&found_member, 1, MPI_INT, group->partners[0], PARTNER_STATUS_TAG, - &my_data_found, 1, MPI_INT, group->partners[1], PARTNER_STATUS_TAG, - group->base.comm, NULL); - MPI_Sendrecv(&found_member, 1, MPI_INT, group->partners[1], PARTNER_STATUS_TAG, - &partner_data_found, 1, MPI_INT, group->partners[0], PARTNER_STATUS_TAG, - group->base.comm, NULL); - - - if(found_member && partner_data_found && my_data_found){ - //I have my data, and the person who's data I am backing up has theirs. We're good to go. - retval = FENIX_SUCCESS; - } else if (!found_member && (!my_data_found || !partner_data_found)){ - //I lost my data, and my partner doesn't have a copy for me to restore from. - debug_print("ERROR Fenix_Data_member_restore: member_id <%d> does not exist at <%d> or partner(s) <%d> <%d>\n", - member_id, group->base.current_rank, group->partners[0], group->partners[1]); - - retval = FENIX_ERROR_INVALID_MEMBERID; - } else if(found_member){ - //My partner(s) need info on this member. This policy does nothing special w/ extra input params, so - //I can just send the basic member metadata. - if(!partner_data_found) - __fenix_data_member_send_metadata(group->base.groupid, member_id, group->partners[0]); - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - //send data region info next - if(!partner_data_found) - __fenix_data_subset_send(mentry->data_regions + snapshot, group->partners[0], - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->base.comm); - - size_t size; - void* toSend; - //send my data, to maintain resiliency on my data - if(!my_data_found){ - toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, - mentry->data[snapshot], member_data.datatype_size, member_data.current_count, - &size); - MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[1], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); - free(toSend); - } - - //send their data - if(!partner_data_found){ - toSend = __fenix_data_subset_serialize(mentry->data_regions+snapshot, - ((char*)mentry->data[snapshot]) + member_data.datatype_size*member_data.current_count, - member_data.datatype_size, member_data.current_count, &size); - MPI_Send(toSend, member_data.datatype_size*size, MPI_BYTE, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm); - free(toSend); - } - - } - - } else if(!found_member) { - //I need info on this member. - fenix_member_entry_packet_t packet; - __fenix_data_member_recv_metadata(group->base.groupid, group->partners[1], &packet); - - //We remake the new member just like the user would. - __fenix_member_create(group->base.groupid, packet.memberid, NULL, packet.current_count, - packet.datatype_size); - - //Mark the member for deletion if another failure interrupts recovering fully. - undo_data = (fenix_imr_undo_log_t*)malloc(sizeof(fenix_imr_undo_log_t)); - undo_data->groupid = group->base.groupid; - undo_data->memberid = member_id; - Fenix_Callback_register(__imr_undo_restore, (void*)undo_data); - - __imr_find_mentry(group, member_id, &mentry); - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = group->base.member->member_entry[member_data_index]; - - mentry->current_head = group->num_snapshots; - - //now recover data. - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - mentry->timestamp[snapshot] = group->timestamps[snapshot]; - - __fenix_data_subset_free(mentry->data_regions+snapshot); - __fenix_data_subset_recv(mentry->data_regions+snapshot, group->partners[1], - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->base.comm); - - int recv_size = __fenix_data_subset_data_size(mentry->data_regions + snapshot, - member_data.current_count); - - if(recv_size > 0){ - void* recv_buf = malloc(member_data.datatype_size * recv_size); - //first receive their data, so store in the resiliency section. - MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[0], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); - __fenix_data_subset_deserialize(mentry->data_regions + snapshot, recv_buf, - ((char*)mentry->data[snapshot]) + member_data.current_count*member_data.datatype_size, - member_data.current_count, member_data.datatype_size); - - //Now receive my data. - MPI_Recv(recv_buf, recv_size*member_data.datatype_size, MPI_BYTE, group->partners[1], - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->base.comm, NULL); - __fenix_data_subset_deserialize(mentry->data_regions + snapshot, recv_buf, - mentry->data[snapshot], member_data.current_count, member_data.datatype_size); - - free(recv_buf); - } - } - - //Member restored fully, so we don't need to mark it for undoing on failure. - Fenix_Callback_pop(); - free(undo_data); - } - - - recovery_locally_possible = found_member || (my_data_found && partner_data_found); - if(recovery_locally_possible) retval = FENIX_SUCCESS; - - } else if (group->raid_mode == 5){ - int* set_results = malloc(sizeof(int) * group->set_size); - MPI_Allgather((void*)&found_member, 1, MPI_INT, (void*)set_results, 1, MPI_INT, - group->set_comm); - - int recovering_node = -1, recovery_possible = 1; - for(int i = 0; i < group->set_size; i++){ - if(!set_results[i]){ - - if(recovering_node == -1){ - recovering_node = i; - } else { - recovery_possible = 0; - break; - } - - } - } - - free(set_results); - - //If we have a recovering node, and recovery is possible, do it - if((recovering_node != -1) && recovery_possible){ - int my_set_rank; - MPI_Comm_rank(group->set_comm, &my_set_rank); - - //The recovering node needs metadata on this member, just needs it from one partner. - if((recovering_node == 0 && my_set_rank == 1) || my_set_rank == 0){ - //I'm the node that's going to send metadata - - //This function pulls comm from the base group - so we need to give - //dest_rank in terms of that comm - __fenix_data_member_send_metadata(group->base.groupid, member_id, group->partners[recovering_node]); - - //Now my partner will need all of the entries. First they'll need to know how many snapshots - //to expect. - MPI_Send((void*) &(group->num_snapshots), 1, MPI_INT, recovering_node, - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm); - - //They also need the timestamps for each snapshot, as well as the value for the next. - MPI_Send((void*)mentry->timestamp, group->num_snapshots+1, MPI_INT, recovering_node, - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm); - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - __fenix_data_subset_send(mentry->data_regions + snapshot, recovering_node, - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->set_comm); - } - - } else if(!found_member) { - //I'm the one that needs the info. - fenix_member_entry_packet_t packet; - __fenix_data_member_recv_metadata(group->base.groupid, group->partners[my_set_rank==0 ? 1 : 0], &packet); - - //We remake the new member just like the user would. - __fenix_member_create(group->base.groupid, packet.memberid, NULL, packet.current_count, - packet.datatype_size); - - //Mark the member for deletion if another failure interrupts recovering fully. - undo_data = (fenix_imr_undo_log_t*)malloc(sizeof(fenix_imr_undo_log_t)); - undo_data->groupid = group->base.groupid; - undo_data->memberid = member_id; - Fenix_Callback_register(__imr_undo_restore, (void*)undo_data); - - - __imr_find_mentry(group, member_id, &mentry); - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - member_data = group->base.member->member_entry[member_data_index]; - - - MPI_Recv((void*)&(group->num_snapshots), 1, MPI_INT, (my_set_rank==0 ? 1 : 0), - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm, NULL); - - mentry->current_head = group->num_snapshots; - - //We also need to explicitly ask for all timestamps, since user may have deleted some and caused mischief. - MPI_Recv((void*)(mentry->timestamp), group->num_snapshots + 1, MPI_INT, (my_set_rank==0 ? 1 : 0), - RECOVER_MEMBER_ENTRY_TAG^group->base.groupid, group->set_comm, NULL); - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - __fenix_data_subset_free(mentry->data_regions+snapshot); - __fenix_data_subset_recv(mentry->data_regions+snapshot, (my_set_rank==0 ? 1 : 0), - __IMR_RECOVER_DATA_REGION_TAG ^ group->base.groupid, group->set_comm); - } - } - - for(int snapshot = 0; snapshot < group->num_snapshots; snapshot++){ - //Similar to the process of doing a store, we're going to end up XORing with noisy data from - //the recovering node, then XORing with it again to get what we actually want. - int parity_size = (member_data.datatype_size*member_data.current_count)/(group->set_size-1); - int remainder = (member_data.datatype_size*member_data.current_count)%(group->set_size-1); - - if(remainder > 0) remainder++; - - void* data_buf = mentry->data[snapshot]; - void* parity_buf = (void*)((char*)data_buf + member_data.datatype_size*member_data.current_count + 2); - - int offset = 0; - for(int i = 0; i < group->set_size; i++){ - //Make sure to send the (out of order) parity info on the correct grouping - void* toSend; - if(i == my_set_rank){ - if(my_set_rank != recovering_node){ - toSend = parity_buf; - } else { - toSend = data_buf; - } - } else { - if(my_set_rank != recovering_node){ - toSend = (void*)((char*)data_buf + offset); - } else { - toSend = parity_buf; - } - } - - void* recv_buf = (i == my_set_rank ? parity_buf : (void*)((char*)data_buf + offset)); - - MPI_Reduce(toSend, recv_buf, parity_size + (1set_comm); - - if(my_set_rank == recovering_node){ - //Remove the random data I had to send from the result. - MPI_Reduce_local(toSend, recv_buf, parity_size + (i does not exist at <%d> and is not recoverable from RAID-5 set\n", - member_id, group->base.current_rank); - - retval = FENIX_ERROR_INVALID_MEMBERID; - recovery_locally_possible = 0; - } else { - retval = FENIX_SUCCESS; - recovery_locally_possible = 1; - } - - - - } else { - debug_print("ERROR Fenix_Data_member_store: Raid mode <%d> is not supported yet!\n", - group->raid_mode); - retval = FENIX_ERROR_UNINITIALIZED; - recovery_locally_possible = 0; - } - - - - //Now that we've ensured everyone has data, restore from it. - - int return_found_data; - if(data_found == NULL){ - data_found = (Fenix_Data_subset*) malloc(sizeof(Fenix_Data_subset)); - return_found_data = 0; - } else { - return_found_data = 1; - } - __fenix_data_subset_init(1, data_found); - - //Don't try to restore if we weren't able to get the relevant data. - if(recovery_locally_possible && target_buffer != NULL){ - data_found->specifier = __FENIX_SUBSET_EMPTY; - - int oldest_snapshot; - for(oldest_snapshot = (mentry->current_head - 1); oldest_snapshot >= 0; oldest_snapshot--){ - __fenix_data_subset_merge_inplace(data_found, mentry->data_regions + oldest_snapshot); - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - //The snapshots have formed a full set of data, not need to add older snapshots. - break; - } - } - - //If there isn't a full set of data, don't try to pull from nonexistent snapshot. - if(oldest_snapshot == -1){ - oldest_snapshot = 0; - } - - for(int i = oldest_snapshot; i < mentry->current_head; i++){ - __fenix_data_subset_copy_data(&mentry->data_regions[i], target_buffer, - mentry->data[i], member_data.datatype_size, member_data.current_count); - } - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - retval = FENIX_SUCCESS; - } else { - retval = FENIX_WARNING_PARTIAL_RESTORE; - } - } else { - data_found->specifier = __FENIX_SUBSET_EMPTY; - } - - if(!return_found_data){ - __fenix_data_subset_free(data_found); - free(data_found); - } - - //Dont forget to clear the commit buffer - if(recovery_locally_possible) mentry->data_regions[mentry->current_head].specifier = __FENIX_SUBSET_EMPTY; - - - return retval; -} - -int __imr_member_lrestore(fenix_group_t* g, int member_id, - void* target_buffer, int max_count, int time_stamp, Fenix_Data_subset* data_found){ - int retval = -1; - - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - - fenix_imr_mentry_t* mentry; - //find_mentry returns the error status. We found the member (and corresponding data) if there are no errors. - int found_member = !(__imr_find_mentry(group, member_id, &mentry)); - - if(!found_member){ - return FENIX_ERROR_INVALID_MEMBERID; - } - - int member_data_index = __fenix_search_memberid(group->base.member, member_id); - fenix_member_entry_t member_data = group->base.member->member_entry[member_data_index]; - - - - int return_found_data; - if(data_found == NULL){ - data_found = (Fenix_Data_subset*) malloc(sizeof(Fenix_Data_subset)); - return_found_data = 0; - } else { - return_found_data = 1; - } - __fenix_data_subset_init(1, data_found); - - data_found->specifier = __FENIX_SUBSET_EMPTY; - - - int oldest_snapshot; - for(oldest_snapshot = (mentry->current_head - 1); oldest_snapshot >= 0; oldest_snapshot--){ - __fenix_data_subset_merge_inplace(data_found, mentry->data_regions + oldest_snapshot); - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - //The snapshots have formed a full set of data, not need to add older snapshots. - break; - } - } - - //If there isn't a full set of data, don't try to pull from nonexistent snapshot. - if(oldest_snapshot == -1){ - oldest_snapshot = 0; - } - - for(int i = oldest_snapshot; i < mentry->current_head; i++){ - __fenix_data_subset_copy_data(&mentry->data_regions[i], target_buffer, - mentry->data[i], member_data.datatype_size, member_data.current_count); - } - - if(__fenix_data_subset_is_full(data_found, member_data.current_count)){ - retval = FENIX_SUCCESS; - } else { - retval = FENIX_WARNING_PARTIAL_RESTORE; - } - - //Dont forget to clear the commit buffer - mentry->data_regions[mentry->current_head].specifier = __FENIX_SUBSET_EMPTY; - - return retval; - -} - - -int __imr_member_restore_from_rank(fenix_group_t* group, int member_id, - void* target_buffer, int max_count, int time_stamp, - int source_rank){return 0;} - - -int __imr_member_get_attribute(fenix_group_t* group, fenix_member_entry_t* member, - int attributename, void* attributevalue, int* flag, int sourcerank){return 0;} - -int __imr_member_set_attribute(fenix_group_t* g, fenix_member_entry_t* member, - int attributename, void* attributevalue, int* flag){ - //No mutable attributes (as of now) require any changes to this policy's info - return FENIX_SUCCESS; -} - -int __imr_reinit(fenix_group_t* g, int* flag){ - fenix_imr_group_t* group = (fenix_imr_group_t*)g; - - if(group->raid_mode == 5){ - //Rebuild the set comm to re-include the failed node(s). - MPI_Group comm_group, set_group; - MPI_Comm_group(g->comm, &comm_group); - MPI_Group_incl(comm_group, group->set_size, group->partners, &set_group); - MPI_Comm_create_group(g->comm, set_group, 0, &(group->set_comm)); - } - - __imr_sync_timestamps(group); - - *flag = FENIX_SUCCESS; - - return FENIX_SUCCESS; -} - -void __imr_sync_timestamps(fenix_imr_group_t* group){ - int n_snapshots = group->num_snapshots; - - if(group->raid_mode == 1){ - int partner_snapshots; - MPI_Sendrecv(&n_snapshots, 1, MPI_INT, group->partners[0], 34560, - &partner_snapshots, 1, MPI_INT, group->partners[1], 34560, - group->base.comm, MPI_STATUS_IGNORE); - n_snapshots = n_snapshots > partner_snapshots ? n_snapshots : partner_snapshots; - - MPI_Sendrecv(&n_snapshots, 1, MPI_INT, group->partners[1], 34561, - &partner_snapshots, 1, MPI_INT, group->partners[0], 34561, - group->base.comm, MPI_STATUS_IGNORE); - n_snapshots = n_snapshots > partner_snapshots ? n_snapshots : partner_snapshots; - } else { - MPI_Allreduce(MPI_IN_PLACE, &n_snapshots, 1, MPI_INT, MPI_MAX, group->set_comm); - } - - bool need_reset = group->num_snapshots != n_snapshots; - for(int i = group->num_snapshots; i < n_snapshots; i++) group->timestamps[i] = -1; - - if(group->raid_mode == 1){ - int* p0_stamps = (int*)malloc(sizeof(int)*n_snapshots); - int* p1_stamps = (int*)malloc(sizeof(int)*n_snapshots); - - MPI_Sendrecv(group->timestamps, n_snapshots, MPI_INT, group->partners[1], 34562, - p0_stamps, n_snapshots, MPI_INT, group->partners[0], 34562, - group->base.comm, MPI_STATUS_IGNORE); - MPI_Sendrecv(group->timestamps, n_snapshots, MPI_INT, group->partners[0], 34563, - p1_stamps, n_snapshots, MPI_INT, group->partners[1], 34563, - group->base.comm, MPI_STATUS_IGNORE); - - for(int i = 0; i < n_snapshots; i++){ - int old_stamp = group->timestamps[i]; - group->timestamps[i] = group->timestamps[i] > p0_stamps[i] ? group->timestamps[i] : p0_stamps[i]; - group->timestamps[i] = group->timestamps[i] > p1_stamps[i] ? group->timestamps[i] : p1_stamps[i]; - - need_reset |= group->timestamps[i] != old_stamp; - } - - free(p0_stamps); - free(p1_stamps); - } else { - MPI_Allreduce(MPI_IN_PLACE, group->timestamps, n_snapshots, MPI_INT, MPI_MAX, group->set_comm); - } - - group->num_snapshots = n_snapshots; - if(n_snapshots > 0) group->base.timestamp = group->timestamps[n_snapshots-1]; - else group->base.timestamp = -1; - - //Now fix members - if(need_reset && group->entries_count > 0) { - if(fenix.options.verbose == 1){ - verbose_print("Outdated timestamps on rank %d. All members will require full recovery.\n", - group->base.current_rank); - } - //For now, just delete all members and assume partner(s) can - //help me rebuild fully consistent state - for(int i = group->entries_count-1; i >= 0; i--){ - int memberid = group->entries[i].memberid; - Fenix_Data_member_delete(group->base.groupid, memberid); - } - } -} - -int __imr_get_redundant_policy(fenix_group_t* group, int* policy_name, - void* policy_value, int* flag){ - int retval = FENIX_SUCCESS; - *policy_name = FENIX_DATA_POLICY_IN_MEMORY_RAID; - - fenix_imr_group_t* full_group = (fenix_imr_group_t *)group; - int* policy_vals = (int*) policy_value; - policy_vals[0] = full_group->raid_mode; - policy_vals[1] = full_group->rank_separation; - - *flag = FENIX_SUCCESS; - return retval; -} - -int __imr_group_delete(fenix_group_t* g){ - fenix_imr_group_t* group = (fenix_imr_group_t*) g; - - for(int entry = 0; entry < group->base.member->count; entry++){ - __imr_member_free(group->entries+entry, g->depth); - } - free(group->entries); - - //We have the responsibility of destroying the member array in the base group struct. - __fenix_data_member_destroy(group->base.member); - - free(group->partners); - free(group); - return FENIX_SUCCESS; -} diff --git a/src/fenix_data_policy_in_memory_raid.cpp b/src/fenix_data_policy_in_memory_raid.cpp new file mode 100644 index 0000000..aa62c67 --- /dev/null +++ b/src/fenix_data_policy_in_memory_raid.cpp @@ -0,0 +1,943 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "fenix.h" +#include "fenix_ext.hpp" +#include "fenix_opt.hpp" +#include "fenix_data_subset.h" +#include "fenix_data_recovery.hpp" +#include "fenix_data_policy.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_member.hpp" +#include "fenix_data_policy_in_memory_raid.hpp" + +#define __FENIX_IMR_DEFAULT_MENTRY_NUM 10 +#define __FENIX_IMR_NO_MEMBERS 16000 +#define __IMR_RECOVER_DATA_REGION_TAG 97854 + +#define STORE_PAYLOAD_TAG 2004 + +namespace Fenix::Data::IMR { + +void __fenix_policy_in_memory_raid_get_group(fenix_group_t** group, MPI_Comm comm, + int timestart, int depth, void* policy_value, int* flag){ + *group = new Group(comm, timestart, depth, (int*)policy_value, flag); +}; + +Entry::Entry(int size, int max_count) + : elm_size(size), elm_max_count(max_count) { + buf.reserve(size * max_count); +} + +Entry::Entry(Entry&& other){ + *this = std::move(other); +} + +Entry& Entry::operator=(Entry&& other){ + timestamp = std::exchange(other.timestamp, -2); + + region = std::move(other.region); + partner_region = std::move(partner_region); + buf = std::move(other.buf); + partner_buf = std::move(other.partner_buf); + elm_size = other.elm_size; + elm_max_count = other.elm_max_count; + return *this; +} + +char* Entry::data(){ return buf.data(); } +void Entry::resize(int size){ buf.resize(size); } +int Entry::size(){ return buf.size(); } + +char* Entry::partner_data(){ return partner_buf.data(); } +void Entry::partner_resize(int size){ partner_buf.resize(size); } +int Entry::partner_size(){ return partner_buf.size(); } + +void Entry::add_and_fit(const DataSubset& subset){ + region += subset; + + int new_count = elm_max_count; + if(!new_count) region.max_count(); + if(!new_count) new_count = subset.max_count(); + + int new_size = new_count*elm_size; + if(new_size > buf.size()) buf.resize(new_size); +} + +void Entry::partner_add_and_fit(const DataSubset& subset){ + partner_region += subset; + + int new_count = elm_max_count; + if(!new_count) partner_region.max_count(); + if(!new_count) new_count = subset.max_count(); + + int new_size = new_count*elm_size; + if(new_size > partner_buf.size()) partner_buf.resize(new_size); +} + +void Entry::reset(){ + timestamp = -2; + + buf.clear(); + partner_buf.clear(); + + region = {}; + partner_region = {}; +} + +Member::Member(fenix_member_entry_t& my_mentry, Group& my_group) + : mentry(my_mentry), group(my_group), send_buf(group.send_buf), + recv_buf(group.recv_buf) +{ + for(int i = 0; i < group.depth+2; i++){ + entries.emplace_back(mentry.datatype_size, mentry.current_count); + } +} + +BuddyMember::BuddyMember(fenix_member_entry_t& my_mentry, Group& my_group) + : Member(my_mentry, my_group) { + for(auto& entry : entries) + entry.partner_resize(mentry.datatype_size*mentry.current_count); +} + +ParityMember::ParityMember(fenix_member_entry_t& my_mentry, Group& my_group) + : Member(my_mentry, my_group) { + int data_len = (mentry.datatype_size*mentry.current_count); + int parity_len = data_len / (group.set_size-1); + + int remainder = data_len % (group.set_size-1); + if(remainder) remainder++; + if(remainder < group.set_rank) parity_len++; + + for(auto& entry : entries) + entry.partner_resize(parity_len); +} + + +bool Member::snapshot_delete(int timestamp){ + bool found = false; + for(int i = entries.size(); i >= 0; i--){ + if(entries[i].timestamp == timestamp){ + assert(!found); + found = true; + entries[i].reset(); + } + //Move deleted snapshot to front + if(found && i > 0){ + std::swap(entries[i], entries[i-1]); + } + } + return found; +} + +int Member::storev(const DataSubset& subset){ + Entry& e = entries.back(); + e.add_and_fit(subset); + subset.copy_data(e.elm_size, e.elm_max_count, mentry.user_data, e.buf); + return this->storev_impl(subset); +} + +int BuddyMember::exch( + const DataSubset& subset, const DataSubset& partner_subset +){ + const int rank = group.set_rank; + const int left = rank == 0 ? group.set_size-1 : rank-1; + const int right = rank == group.set_size-1 ? 0 : rank+1; + + Entry& e = entries.back(); + e.partner_add_and_fit(partner_subset); + + int recv_count = partner_subset.count(e.elm_max_count-1); + recv_buf.reset(e.elm_size*recv_count); + + subset.serialize_data(e.elm_size, e.buf, send_buf); + + MPI_Sendrecv( + send_buf.data(), send_buf.size(), MPI_BYTE, right, 0, + recv_buf.data(), recv_buf.size(), MPI_BYTE, left, 0, + group.set_comm, MPI_STATUS_IGNORE + ); + + partner_subset.deserialize_data( + e.elm_size, recv_buf, e.partner_buf + ); + return FENIX_SUCCESS; +} + +int BuddyMember::storev_impl(const DataSubset& subset){ + //My partner ranks (within set_comm) + const int rank = group.set_rank; + const int left = rank == 0 ? group.set_size-1 : rank-1; + const int right = rank == group.set_size-1 ? 0 : rank+1; + + DataBuffer send_buf, recv_buf; + subset.serialize(send_buf); + + for(int i = 0; i < group.set_size; i++){ + if(i == rank) send_buf.send(right, 0, group.set_comm); + if(i == left) recv_buf.recv_unknown(left, 0, group.set_comm); + } + + return exch(subset, DataSubset(recv_buf)); +} + +int Member::store(const DataSubset& subset){ + Entry& e = entries.back(); + e.add_and_fit(subset); + subset.copy_data(e.elm_size, e.elm_max_count, mentry.user_data, e.buf); + return this->store_impl(subset); +} + +int BuddyMember::store_impl(const DataSubset& subset){ + return exch(subset, subset); +} + +int ParityMember::store_impl(const DataSubset& subset){ + Entry& entry = entries.back(); + + int parity_size = entry.size()/(group.set_size - 1); + int remainder = entry.size()%(group.set_size - 1); + + //If we have any remainder, treat as if we have one more, since a rank + //storing a larger parity block wasn't able to store a larger data block, so + //all such ranks need one extra larger data block. + if(remainder) remainder++; + + int m_parity_size = parity_size; + if(group.set_rank entry.size()){ + //Since we pretend to have an extra remainder if there is any + assert(remainder); + assert(group.set_rank >= remainder); + assert(offset+len == entry.size()+1); + offset--; + } + input = entry.data()+offset; + offset += len; + } + + MPI_Reduce( + MPI_IN_PLACE, input, len, MPI_BYTE, MPI_BXOR, i, group.set_comm + ); + } + + assert(offset == entry.size()); + return FENIX_SUCCESS; +} + +void Member::commit(int timestamp){ + entries.back().timestamp = timestamp; + + Entry oldest = std::move(entries.front()); + entries.pop_front(); + oldest.reset(); + + entries.push_back(std::move(oldest)); +} + +int Member::restore(){ + //First clear out any snapshots that we have but the group doesn't. + auto begin = group.timestamps.begin(); + const auto end = group.timestamps.end(); + for(int entry = 0; entry < entries.size()-1; entry++){ + if(entries[entry].timestamp == -2) continue; + begin = std::lower_bound(begin, end, entries[entry].timestamp); + if(begin == end || *begin != entries[entry].timestamp) + entries[entry].reset(); + } + + //Now make sure snapshots align with group's timestamps + for(int snapshot = 1; snapshot <= group.timestamps.size(); snapshot++){ + int timestamp = group.timestamps[group.timestamps.size()-snapshot]; + int target = entries.size()-snapshot-1; + int actual; + for(actual = target; actual >= 0; actual--){ + if(entries[actual].timestamp == timestamp) break; + } + if(actual == target) continue; + if(actual != -1) { + std::swap(entries[actual], entries[target]); + } else { + int free = -1; + for(int i = 0; i <= target && free == -1; i++){ + if(entries[i].timestamp == -2) free = i; + } + assert(free != -1); + std::swap(entries[free], entries[target]); + } + } + + //Reset the current store buffer entry + entries.back().reset(); + return this->restore_impl(); +} + +int BuddyMember::restore_impl(){ + //My partner ranks (within set_comm) + const int rank = group.set_rank; + const int left = rank == 0 ? group.set_size-1 : rank-1; + const int right = rank == group.set_size-1 ? 0 : rank+1; + + //Data on which partners have found each snapshot + int found[3]; + int& found_here = found[rank]; + int& found_left = found[left]; + int& found_right = found[right]; + + auto e = entries.rbegin()+1; + auto ts = group.timestamps.rbegin(); + for(; ts != group.timestamps.rend(); ts++, e++){ + fenix_assert(e->timestamp == -2 || e->timestamp == *ts); + + found_here = e->timestamp != -2; + MPI_Allgather( + MPI_IN_PLACE, 1, MPI_INT, found, 1, MPI_INT, group.set_comm + ); + + int n_missing = 0; + for(int i = 0; i < group.set_size; i++) if(!found[i]) n_missing++; + if(n_missing == 0) + continue; + else if(n_missing > 1){ + if(group.set_rank == 0) + debug_print( + "WARNING Fenix_Data_member_restore: %s member %d timestamp %d unrecoverable", + group.str().c_str(), id, *ts + ); + continue; + } + + if(!found_here){ + //Fetch my data region from right partner + recv_buf.recv_unknown(right, 0, group.set_comm); + e->add_and_fit({recv_buf}); + //Fetch my data + int m_count = e->region.count(e->elm_max_count-1); + recv_buf.recv(m_count*e->elm_size, right, 0, group.set_comm); + e->region.deserialize_data(e->elm_size, recv_buf, e->buf); + + //Fetch left partner's region + recv_buf.recv_unknown(left, 0, group.set_comm); + e->partner_add_and_fit({recv_buf}); + //Fetch data + int p_count = e->partner_region.count(e->elm_max_count-1); + recv_buf.recv(p_count*e->elm_size, left, 0, group.set_comm); + e->partner_region.deserialize_data( + e->elm_size, recv_buf, e->partner_buf + ); + + //Only update timestamp after all other data updated, to indicate + //recovery of this snapshot completed + e->timestamp = *ts; + } + if(!found_left){ + //Send partner's data region + e->partner_region.serialize(send_buf); + send_buf.send(left, 0, group.set_comm); + //Send their data + e->partner_region.serialize_data( + e->elm_size, e->partner_buf, send_buf + ); + send_buf.send(left, 0, group.set_comm); + } + if(!found_right){ + //Send my data region + e->region.serialize(send_buf); + send_buf.send(right, 0, group.set_comm); + //Send my data + e->region.serialize_data(e->elm_size, e->buf, send_buf); + send_buf.send(right, 0, group.set_comm); + } + } + return FENIX_SUCCESS; +} + +int ParityMember::restore_impl(){ + //Data on which partners have found each snapshot + std::vector found; + found.resize(group.set_size); + int found_here; + + auto e = entries.rbegin()+1; + auto ts = group.timestamps.rbegin(); + for(; ts != group.timestamps.rend(); ts++, e++){ + fenix_assert(e->timestamp == -2 || e->timestamp == *ts); + + found_here = e->timestamp != -2; + MPI_Allgather( + &found_here, 1, MPI_INT, found.data(), 1, MPI_INT, group.set_comm + ); + + int recovering = -1; + for(int i = 0; i < group.set_size; i++){ + if(found[i]) continue; + if(recovering != -1){ + if(group.set_rank == 0) + debug_print( + "WARNING Fenix_Data_member_restore: %s member %d timestamp %d unrecoverable", + group.str(), id, *ts + ); + recovering = -1; + break; + } else { + recovering = i; + } + } + if(recovering == -1) continue; + + int sender = recovering == 0 ? 1 : 0; + if(group.set_rank == sender){ + e->region.serialize(send_buf); + send_buf.send(recovering, 0, group.set_comm); + } else if(!found_here){ + recv_buf.recv_unknown(sender, 0, group.set_comm); + e->add_and_fit({recv_buf}); + } + + //Use the same logic as store, but recovering rank is always root and + //zeroes out the local data region before participating. + int parity_size = e->size()/(group.set_size - 1); + int remainder = e->size()%(group.set_size - 1); + if(remainder) remainder++; + int m_parity_size = parity_size; + if(group.set_rankpartner_resize(m_parity_size); + + if(!found_here){ + std::memset(e->data(), 0, e->size()); + std::memset(e->partner_data(), 0, e->partner_size()); + } + + int offset = 0; + for(int i = 0; i < group.set_size; i++){ + int len = i < remainder ? parity_size + 1 : parity_size; + char* input; + if(group.set_rank == i){ + input = e->partner_data(); + } else { + if(offset+len > e->size()) offset--; + input = e->data()+offset; + offset += len; + } + MPI_Reduce( + MPI_IN_PLACE, input, len, MPI_BYTE, MPI_BXOR, recovering, + group.set_comm + ); + } + assert(offset == e->size()); + + e->timestamp = *ts; + } + + return FENIX_SUCCESS; +} + +int Member::lrestore( + char* target, int max_restore, int timestamp, DataSubset& recovered +){ + //Restoring always clears the commit buffer + entries.back().reset(); + + int end = 0; + if(timestamp == FENIX_TIME_STAMP_MAX){ + if(entries[entries.size()-2].timestamp >= 0){ + end = entries.size()-1; + } + } else { + for(int i = entries.size()-2; i >= 0; i--){ + if(entries[i].timestamp == timestamp){ + end = i+1; + break; + } + } + } + + int begin = end > 0 ? end-1 : end; + if(max_restore != 0){ + for(int i = end-1; i >= 0 && !recovered.includes_all(max_restore-1) ; i--){ + if(entries[i].timestamp < 0) break; + begin = i; + recovered += entries[i].region; + } + } else if(begin < end) { + recovered = entries[begin].region; + } + + for(int i = begin; i < end && target != NULL; i++){ + Entry& e = entries[i]; + e.region.copy_data(e.elm_size, e.buf, max_restore, target); + } + + if(end <= 0) return FENIX_ERROR_NODATA_FOUND; + if(max_restore != 0 && !recovered.includes_all(max_restore-1)) + return FENIX_WARNING_PARTIAL_RESTORE; + return FENIX_SUCCESS; +} + +Group::Group( + MPI_Comm m_comm, int timestart, int depth, int* policy, int* flag +){ + int* policy_vals = (int*)policy; + mode = policy_vals ? policy_vals[0] : 1; + rank_separation = policy_vals ? policy_vals[1] : __fenix_get_world_size(m_comm)/2; + + comm = m_comm; + + int my_rank, comm_size; + MPI_Comm_size(comm, &comm_size); + MPI_Comm_rank(comm, &my_rank); + current_rank = my_rank; + + std::set partner_set; + partner_set.insert(my_rank); + + if(mode == 1){ + //odd-sized groups take some extra handling. + bool isOdd = ((comm_size%2) != 0); + + int remaining_size = comm_size; + if(isOdd) remaining_size -= 3; + + //We want to form groups of rank_separation*2 to pair within + int n_full_groups = remaining_size / (rank_separation*2); + + //We don't always get what we want though, one group may need to be + //smaller. + int mini_group_size = + (remaining_size - n_full_groups*rank_separation*2)/2; + + int start_rank = mini_group_size + (isOdd?1:0); + int mid_rank = comm_size/2; //Only used when isOdd + + int end_mini_group_start = comm_size-mini_group_size-(isOdd?1:0); + int start_mini_group_start = (isOdd?1:0); + bool in_start_mini = + my_rank >= start_mini_group_start + && my_rank < start_mini_group_start+mini_group_size; + bool in_end_mini = + my_rank >= end_mini_group_start && my_rank < comm_size-(isOdd?1:0); + + //Allocate the "normal" ranks + if(my_rank >= start_rank && my_rank < end_mini_group_start + && (!isOdd || my_rank != mid_rank)){ + //"effective" rank for determining which group I'm in and if I look + //forward or backward for a partner. + int e_rank = my_rank - start_rank; + if(isOdd && my_rank > mid_rank) + --e_rank; //We skip the middle rank when isOdd + + int my_partner; + if(((e_rank/rank_separation)%2) == 0){ + //Look forward for partner. + my_partner = my_rank + rank_separation; + if(isOdd && my_rank < mid_rank && my_partner >= mid_rank) + ++my_partner; + } else { + my_partner = my_rank - rank_separation; + if(isOdd && my_rank > mid_rank && my_partner <= mid_rank) + --my_partner; + } + + partner_set.insert(my_partner); + } else if(in_start_mini) { + int e_rank = my_rank - start_mini_group_start; + int partner = end_mini_group_start + e_rank; + partner_set.insert(partner); + } else if(in_end_mini) { + int e_rank = my_rank - end_mini_group_start; + int partner = start_mini_group_start + e_rank; + partner_set.insert(partner); + } else { + //Only things left are the three ranks that must be paired to handle + //odd-sized comms + partner_set.insert({0, mid_rank, comm_size-1}); + + //my_rank should be one of the inserted ranks, or something in the + //logic here is broken. + assert(partner_set.size() == 3 || (comm_size==1 && partner_set.size()==1)); + } + } else if(mode == 5){ + set_size = policy_vals[2]; + + //User is responsible for giving values that "make sense" for set size and rank separation given a comm size. + int my_set_pos = (my_rank/rank_separation)%set_size; + for(int index = 0; index < set_size; index++){ + int partner = (comm_size + my_rank - + rank_separation * (my_set_pos-index) + )%comm_size; + partner_set.insert(partner); + } + } + + partners = {partner_set.begin(), partner_set.end()}; + + //Make same MPI calls as reinit + reinit(flag); +} + +void Group::build_set_comm(){ + if(set_comm != MPI_COMM_NULL){ + MPI_Comm_free(&set_comm); + set_comm = MPI_COMM_NULL; + } + + MPI_Group comm_group, set_group; + MPI_Comm_group(comm, &comm_group); + MPI_Group_incl(comm_group, partners.size(), partners.data(), &set_group); + MPI_Comm_create_group(comm, set_group, 0, &(set_comm)); + + MPI_Group_free(&comm_group); + MPI_Group_free(&set_group); + + MPI_Comm_size(set_comm, &set_size); + MPI_Comm_rank(set_comm, &set_rank); +} + +Member* Group::find_member(int memberid){ + auto iter = member_data.find(memberid); + if(iter != member_data.end()) return iter->second.get(); + return nullptr; +} + +std::string Group::str(){ + std::stringstream ss; + ss << "Group " << groupid << " set "; + ss << "[" << partners[0]; + for(int i=1; imemberid, nullptr); + if(iter.second){ + if(mode == 1) + iter.first->second = std::make_shared(*mentry, *this); + else if(mode == 5) + iter.first->second = std::make_shared(*mentry, *this); + else assert(false); + + return FENIX_SUCCESS; + } else return FENIX_ERROR_MEMBER_CREATE; +} + +int Group::member_delete(int member_id){ + auto iter = member_data.find(member_id); + + if(iter == member_data.end()){ + debug_print("ERROR Fenix_Data_member_delete: member_id <%d> does not exist!\n", + member_id); + return FENIX_ERROR_INVALID_MEMBERID; + } + + member_data.erase(iter); + return FENIX_SUCCESS; +} + +int Group::member_store(int member_id, const DataSubset& subset){ + auto iter = member_data.find(member_id); + if(iter == member_data.end()){ + debug_print( + "ERROR Fenix_Data_member_store: %s unknown member_id %d on rank %d\n", + this->str().c_str(), member_id, current_rank + ); + return FENIX_ERROR_INVALID_MEMBERID; + } + return iter->second->store(subset); +} + +int Group::member_storev(int member_id, const DataSubset& subset){ + auto iter = member_data.find(member_id); + if(iter == member_data.end()){ + debug_print( + "ERROR Fenix_Data_member_storev: %s unknown member_id %d on rank %d\n", + this->str().c_str(), member_id, current_rank + ); + return FENIX_ERROR_INVALID_MEMBERID; + } + return iter->second->storev(subset); +} + +int Group::member_istore( + int member_id, const DataSubset& subset, Fenix_Request *request +) {return 0;} + +int Group::member_istorev( + int member_id, const DataSubset& subset_specifier, Fenix_Request *request +) {return 0;} + + +int Group::commit(){ + if(timestamps.size() == depth+1){ + //Full of timestamps, remove the oldest and proceed as normal. + timestamps.pop_front(); + } + timestamps.push_back(timestamp); + + for(auto& iter : member_data){ + iter.second->commit(timestamp); + } + + send_buf.clear(); + send_buf.shrink_to_fit(); + recv_buf.clear(); + recv_buf.shrink_to_fit(); + + return FENIX_SUCCESS; +} + + +int Group::snapshot_delete(int to_delete){ + int retval = FENIX_SUCCESS; + + bool found = false; + for(auto it = timestamps.begin(); it != timestamps.end(); it++){ + if(*it == to_delete){ + timestamps.erase(it); + found = true; + break; + } + } + for(auto& iter : member_data){ + found |= iter.second->snapshot_delete(to_delete); + } + return found ? FENIX_SUCCESS : FENIX_ERROR_INVALID_TIMESTAMP; +} + +int Group::barrier(){return 0;} + +int Group::get_number_of_snapshots(int* num){ + *num = timestamps.size(); + return FENIX_SUCCESS; +} + +int Group::get_snapshot_at_position(int idx, int* snapshot){ + if(idx >= timestamps.size() || idx < 0) + return FENIX_ERROR_INVALID_POSITION; + + *snapshot = timestamps[idx]; + return FENIX_SUCCESS; +} + +std::vector Group::get_snapshots(){ + return {timestamps.begin(), timestamps.end()}; +} + +int Group::member_restore( + int member_id, void* target_buffer, int max_count, int ts, + DataSubset& data_found +){ + //TODO: Is this fix needed anymore? + //One-time fix after a reinit. + if(timestamp == -1 && !timestamps.empty()) + timestamp = timestamps.back(); + + IMR::Member* member = find_member(member_id); + + std::vector found_members(set_size); + found_members[set_rank] = member ? 1 : 0; + + int allgather_ret = MPI_Allgather( + MPI_IN_PLACE, 1, MPI_INT, found_members.data(), 1, MPI_INT, set_comm + ); + + int n_missing = 0; + int first_found = -1, missing_rank = -1; + for(int i = 0; i < found_members.size(); i++){ + if(!found_members[i]){ + n_missing++; + missing_rank = i; + } + if(found_members[i] && first_found == -1) + first_found = i; + } + + if(n_missing > 1){ + if(set_rank != 0) return FENIX_ERROR_INVALID_MEMBERID; + + if(n_missing == set_size){ + debug_print( + "ERROR Fenix_Data_member_restore: %s member_id %d not found\n", + this->str().c_str(), member_id + ); + } else { + debug_print( + "ERROR Fenix_Data_member_restore: %s member_id %d unrecoverable\n", + this->str().c_str(), member_id + ); + } + return FENIX_ERROR_INVALID_MEMBERID; + } else if(n_missing == 1){ + fenix_member_entry_packet_t packet; + if(set_rank == first_found) packet = member->mentry.to_packet(); + + MPI_Bcast( + &packet, sizeof(packet), MPI_BYTE, first_found, set_comm + ); + + if(!found_members[set_rank]){ + this->member_create(__fenix_data_member_add_entry( + this, packet.memberid, target_buffer, packet.current_count, + packet.datatype_size + )); + member = find_member(member_id); + assert(member); + } + } + + member->restore(); + + send_buf.clear(); + send_buf.shrink_to_fit(); + recv_buf.clear(); + recv_buf.shrink_to_fit(); + + return member->lrestore((char*)target_buffer, max_count, ts, data_found); +} + +int Group::member_lrestore( + int member_id, void* target_buffer, int max_count, int ts, + DataSubset& data_found +){ + auto iter = member_data.find(member_id); + if(iter == member_data.end()) return FENIX_ERROR_INVALID_MEMBERID; + return iter->second->lrestore((char*)target_buffer, max_count, ts, data_found); +} + + +int Group::member_restore_from_rank(int member_id, + void* target_buffer, int max_count, int timestamp, + int source_rank){return 0;} + + +int Group::member_get_attribute(fenix_member_entry_t* member, + int attributename, void* attributevalue, int* flag, int sourcerank){return 0;} + +int Group::member_set_attribute(fenix_member_entry_t* member, + int attributename, void* attributevalue, int* flag){ + //No mutable attributes (as of now) require any changes to this policy's info + return FENIX_SUCCESS; +} + +int Group::reinit(int* flag){ + build_set_comm(); + sync_timestamps(); + + *flag = FENIX_SUCCESS; + return *flag; +} + +void Group::sync_timestamps(){ + int n_snapshots = timestamps.size(); + + MPI_Allreduce(MPI_IN_PLACE, &n_snapshots, 1, MPI_INT, MPI_MAX, set_comm); + + bool need_reset = timestamps.size() != n_snapshots; + for(int i = timestamps.size(); i < n_snapshots; i++) timestamps.push_front(-1); + + std::vector ts = {timestamps.begin(), timestamps.end()}; + MPI_Allreduce( + MPI_IN_PLACE, ts.data(), n_snapshots, MPI_INT, MPI_MAX, set_comm + ); + timestamps = {ts.begin(), ts.end()}; + + if(!timestamps.empty()) timestamp = timestamps.back(); + else timestamp = -1; +} + +int Group::get_redundant_policy(int* policy_name, void* policy_value, int* flag){ + *policy_name = FENIX_DATA_POLICY_IN_MEMORY_RAID; + + int* policy_vals = (int*) policy_value; + policy_vals[0] = mode; + policy_vals[1] = rank_separation; + if(mode == 5) policy_vals[2] = set_size; + + *flag = FENIX_SUCCESS; + return *flag; +} + +int Group::group_delete(){ + delete this; + return FENIX_SUCCESS; +} +} // namespace Fenix::IMR diff --git a/src/fenix_data_recovery.c b/src/fenix_data_recovery.c deleted file mode 100644 index 0fba896..0000000 --- a/src/fenix_data_recovery.c +++ /dev/null @@ -1,1128 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - - - -#include "fenix_data_recovery.h" -#include "fenix_data_policy.h" -#include "fenix_opt.h" -//#include "fenix_process_recovery.h" -#include "fenix_util.h" -#include "fenix_ext.h" - -#include - -/** - * @brief create new group or recover group data for lost processes - * @param groud_id - * @param comm - * @param time_start - * @param depth - */ -int __fenix_group_create( int groupid, MPI_Comm comm, int timestart, int depth, int policy_name, - void* policy_value, int* flag) { - - int retval = -1; - - /* Retrieve the array index of the group maintained under the cover. */ - int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); - - if (fenix.options.verbose == 12) { - - verbose_print("c-rank: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), group_index); - } - - - /* Check the integrity of user's input */ - if ( timestart < 0 ) { - - debug_print("ERROR Fenix_Data_group_create: time_stamp <%d> must be greater than or equal to zero\n", timestart); - retval = FENIX_ERROR_INVALID_TIMESTAMP; - - } else if ( depth < -1 ) { - - debug_print("ERROR Fenix_Data_group_create: depth <%d> must be greater than or equal to -1\n",depth); - retval = FENIX_ERROR_INVALID_DEPTH; - - } else { - - /* This code block checks the need for data recovery. */ - /* If so, recover the data and set the recovery */ - /* for member recovery. */ - - int i; - int remote_need_recovery; - fenix_group_t *group; - MPI_Status status; - - fenix_data_recovery_t *data_recovery = fenix.data_recovery; - - /* Initialize Group. The group hasn't been created. */ - /* I am either a brand-new process or a recovered process. */ - if (group_index == -1 ) { - - if (fenix.options.verbose == 12 && __fenix_get_current_rank(comm) == 0) { - printf("this is a new group!\n"); - } - - /* Obtain an available group slot */ - group_index = __fenix_find_next_group_position(data_recovery); - - /* Initialize Group */ - __fenix_policy_get_group(data_recovery->group + group_index, comm, timestart, - depth, policy_name, policy_value, flag); - - //The group has filled any group-specific details, we need to fill in the core details. - group = (data_recovery->group[ group_index ] ); - group->groupid = groupid; - group->timestart = timestart; - group->timestamp = -1; //indicates no commits yet - group->depth = depth; - group->member = __fenix_data_member_init(); - group->comm = comm; - MPI_Comm_rank(comm, &(group->current_rank)); - - - //Update the count AFTER finding next group position. - data_recovery->count++; - - if ( fenix.options.verbose == 12) { - verbose_print( - "c-rank: %d, g-groupid: %d, g-timestart: %d, g-depth: %d\n", - __fenix_get_current_rank(fenix.new_world), group->groupid, - group->timestart, - group->depth); - } - - } else { /* Already created. Renew the MPI communicator */ - - group = ( data_recovery->group[group_index] ); - group->comm = comm; /* Renew communicator */ - MPI_Comm_rank(comm, &(group->current_rank)); - - - //Reinit group metadata as needed w/ new communicator. - group->vtbl.reinit(group, flag); - } - - - /* Global agreement among the group */ - retval = FENIX_SUCCESS; - } - return retval; -} - -int __fenix_group_get_redundancy_policy(int groupid, int* policy_name, int* policy_value, int* flag){ - int retval = -1; - int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); - - if(group_index == -1){ - debug_print("ERROR Fenix_Data_member_create: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t* group = fenix.data_recovery->group[group_index]; - retval = group->vtbl.get_redundant_policy(group, policy_name, policy_value, flag); - } - - return retval; -} - - -/** - * @brief - * @param group_id - * @param member_id - * @param data - * @param count - * @param data_type - */ -int __fenix_member_create(int groupid, int memberid, void *data, int count, int datatype_size ) { - int retval = -1; - int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); - int member_index = -1; - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid ); - - if (fenix.options.verbose == 13) { - verbose_print("c-rank: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(fenix.new_world), - group_index, member_index); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_create: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index != -1) { - debug_print("ERROR Fenix_Data_member_create: member_id <%d> already exists\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - - } else { - - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - - //First, we'll make a fenix-core member entry, then pass that info to - //the specific data policy. - fenix_member_entry_t* mentry; - mentry = __fenix_data_member_add_entry(member, memberid, data, count, datatype_size); - - //Pass the info along to the policy - retval = group->vtbl.member_create(group, mentry); - - } - return retval; - /* No Potential Bug in 2/10/17 */ -} - - -/** - * @brief - * @param request - */ -int __fenix_data_wait( Fenix_Request request ) { - int retval = -1; - int result = __fenix_mpi_wait(&(request.mpi_recv_req)); - - if (result != MPI_SUCCESS) { - retval = FENIX_SUCCESS; - } else { - retval = FENIX_ERROR_DATA_WAIT; - } - - result = __fenix_mpi_wait(&(request.mpi_send_req)); - - if (result != MPI_SUCCESS) { - retval = FENIX_SUCCESS; - } else { - retval = FENIX_ERROR_DATA_WAIT; - } - - return retval; -} - -/** - * @brief - * @param request - * @param flag - */ -int __fenix_data_test(Fenix_Request request, int *flag) { - int retval = -1; - int result = ( __fenix_mpi_test(&(request.mpi_recv_req)) & __fenix_mpi_test(&(request.mpi_send_req))) ; - - if ( result == 1 ) { - *flag = 1; - retval = FENIX_SUCCESS; - } else { - *flag = 0 ; // incomplete error? - retval = FENIX_ERROR_DATA_WAIT; - } - return retval; - /* Good 2/10/17 */ -} - -/** - * @brief // TODO: implement FENIX_DATA_MEMBER_ALL - * @param group_id - * @param member_id - * @param subset_specifier - * - */ - -int __fenix_member_store(int groupid, int memberid, Fenix_Data_subset specifier) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - - /* Check if the member id already exists. If so, the index of the storage space is assigned */ - if (group_index !=-1 && memberid != FENIX_DATA_MEMBER_ALL) { - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid ); - } - - if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { - verbose_print( - "c-rank: %d, role: %d, group_index: %d, member_index: %d memberid: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index, memberid); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_store(group, memberid, specifier); - } - return retval; -} - -/** - * @brief - * @param group_id - * @param member_id - * @param subset_specifier - * @param request - */ -int __fenix_member_istore(int groupid, int memberid, Fenix_Data_subset specifier, - Fenix_Request *request) { - - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - - /* Check if the member id already exists. If so, the index of the storage space is assigned */ - if (group_index !=-1 && memberid != FENIX_DATA_MEMBER_ALL) { - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid ); - } - - if (fenix.options.verbose == 18 && fenix.data_recovery->group[group_index]->current_rank== 0 ) { - verbose_print( - "c-rank: %d, role: %d, group_index: %d, member_index: %d memberid: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index, memberid); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_store: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_istore(group, memberid, specifier, request); - } - return retval; -} - - - -void __fenix_subset(fenix_group_t *group, fenix_member_entry_t *me, Fenix_Data_subset *ss) { -#if 1 - fprintf(stderr,"ERROR Fenix_Subset is not currently supported\n"); - -#else - fenix_version_t *version = &(me->version); - fenix_local_entry_t *lentry = &(version->local_entry[version->position]); - fenix_remote_entry_t *rentry = &(version->remote_entry[version->position]); - - int i; - MPI_Status status; - - /* Store the local data */ - /* This version does not apply any storage saving scheme */ - memcpy(lentry->data, lentry->pdata, (lentry->count * lentry->size)); - - /* Check the subset */ - int subset_total_size = 0; - for( i = 0; i < ss->num_blocks; i++ ) { - int subset_start = ss->start_offset[i]; - int subset_end = ss->start_offset[i]; - int subset_stride = ss->start_offset[i]; - - } - subset_total_size = ss->num_blocks * ss->fblk_size; - - /* Create a buffer for sending data (lentry->size is a size of single element ) */ - void *subset_data = (void *) s_malloc(me->datatype_size * subset_total_size ); - - - /* This data exchange is not necessary when using non-v call */ - member_store_packet_t lentry_packet, rentry_packet; - lentry_packet.rank = lentry->currentrank; - lentry_packet.datatype = lentry->datatype; - lentry_packet.entry_count = lentry->count; - lentry_packet.entry_size = subset_total_size; - - int current_rank = __fenix_get_current_rank(fenix.new_world); - int current_role = fenix.role; - - MPI_Sendrecv(&lentry_packet, sizeof(member_store_packet_t), MPI_BYTE, ge->out_rank, - STORE_SIZE_TAG, &rentry_packet, sizeof(member_store_packet_t), MPI_BYTE, - ge->in_rank, STORE_SIZE_TAG, (ge->comm), &status); - - rentry->remoterank = rentry_packet.rank; - rentry->datatype = rentry_packet.datatype; - rentry->count = rentry_packet.entry_count; - rentry->size = rentry_packet.entry_size; - - if (rentry->data != NULL) { - rentry->data = s_malloc(rentry->count * rentry->size); - } - - /* Partner is sending subset */ - if( rentry->size != rentry->count ) { - /* Receive # of blocks */ - - } - /* Handle Subset */ - int subset_num_blocks = ss->num_blocks; - int subset_start = ss->start_offsets[0]; - int subset_end = ss->end_offsets[0]; - int subset_stride = ss->stride; - int subset_diff = subset_end - subset_start; - int subset_count = subset_num_blocks * subset_diff; - - int subset_block = 0; - int subset_index = 0; - void *subset_data = (void *) s_malloc(sizeof(void) * me->current_count); - - int data_index; - int data_steps = 0; - int data_count = me->current_count; - for (data_index = subset_start; (subset_block != subset_num_blocks - 1) && - (data_index < data_count); data_index++) { - if (data_steps != subset_diff) { - MPI_Sendrecv((lentry->data) + data_index, (1 * lentry->size), MPI_BYTE, ge->out_rank, - STORE_DATA_TAG, (rentry->data) + data_index, (1 * rentry->size), MPI_BYTE, - ge->in_rank, STORE_DATA_TAG, ge->comm, &status); - // memcpy((subset_data) + data_index, (me->current_buf) + data_index, sizeof(me->current_datatype)); - data_steps = data_steps + 1; - } else if (data_steps == subset_diff) { - data_steps = 0; - subset_block = subset_block + 1; - data_index = data_index + subset_stride - 1; - } - } - - /* Need to update the version info */ - if (version->position < version->size - 1) { - version->num_copies++; - version->position++; - } else { /* Back to 0 */ - version->position = 0; - } -#endif -} - - -#if 0 -/** - * @brief - * @param group_id - * @param member_id - * @param subset_specifier - */ -int __fenix_member_storev(int group_id, int member_id, Fenix_Data_subset subset_specifier) { - -/* - * Using the same routine for v and non-v routine. - */ - int retval = -1; - int group_index = __fenix_search_groupid( group_id, fenix.data_recovery ); - int member_index = __fenix_search_memberid(group_index, member_id); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_storev: group_id <%d> does not exist\n", - group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_storev: member_id <%d> does not exist\n", - member_id); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = fenix.data_recovery; - fenix_group_entry_t *gentry = &(group->group_entry[group_index]); - fenix_member_t *member = &(gentry->member); - __fenix_ensure_version_capacity(member); - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - fenix_version_t *version = &(mentry->version); - fenix_local_entry_t *lentry = &(version->local_entry[version->position]); - fenix_remote_entry_t *rentry = &(version->remote_entry[version->position]); - retval = FENIX_SUCCESS; - } - return retval; - -} -#endif - -#if 0 -/** - * @brief - * @param group_id - * @param member_id - * @param subset_specifier - * @param request - */ -int __fenix_member_istorev(int group_id, int member_id, Fenix_Data_subset subset_specifier, - Fenix_Request *request) { - - int retval = -1; - int group_index = __fenix_search_groupid(group_id, __fenixi_g_data_recovery ); - int member_index = __fenix_search_memberid(group_index, member_id); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_istorev: group_id <%d> does not exist\n", - group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_istorev: member_id <%d> does not exist\n", - member_id); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = fenix.data_recovery; - fenix_group_entry_t *gentry = &(group->group_entry[group_index]); - fenix_member_t *member = &(gentry->member); - __fenix_ensure_version_capacity(member); - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - fenix_version_t *version = &(mentry->version); - fenix_local_entry_t *lentry = &(version->local_entry[version->position]); - fenix_remote_entry_t *rentry = &(version->remote_entry[version->position]); - retval = FENIX_SUCCESS; - } - - return retval; -} -#endif - -/** - * @brief - * @param group_id - * @param time_stamp - */ -int __fenix_data_commit(int groupid, int *timestamp) { - /* No communication is performed */ - /* Return the new timestamp */ - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - if (fenix.options.verbose == 22) { - verbose_print("c-rank: %d, role: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); - } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - - if (group->timestamp != -1) group->timestamp++; - else group->timestamp = group->timestart; - - group->vtbl.commit(group); - - if (timestamp != NULL) { - *timestamp = group->timestamp; - } - - retval = FENIX_SUCCESS; - } - return retval; -} - -/** - * @brief - * @param group_id - * @param time_stamp - */ -int __fenix_data_commit_barrier(int groupid, int *timestamp) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - if (fenix.options.verbose == 23) { - verbose_print("c-rank: %d, role: %d, group_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); - } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - - //We want to make sure there aren't any failed MPI operations (IE unfinished stores) - //But we don't want to fail to commit if a failure has happened since a successful store. - int old_failure_handling = fenix.ignore_errs; - fenix.ignore_errs = 1; - - int can_commit = 0; - - //We'll use comm_agree as a resilient barrier - //Our error handler also enters an agree, with a unique location bit set. - //So if we aren't all here, we've hit an error already. - - int location = FENIX_DATA_COMMIT_BARRIER_LOC; - int ret = MPIX_Comm_agree(*fenix.user_world, &location); - if(location == FENIX_DATA_COMMIT_BARRIER_LOC) can_commit = 1; - - fenix.ignore_errs = old_failure_handling; - - if(can_commit == 1){ - if (group->timestamp != -1) group->timestamp++; - else group->timestamp = group->timestart; - retval = group->vtbl.commit(group); - } - - - if(can_commit != 1 || ret != MPI_SUCCESS) { - //A rank failure has happened, lets trigger error handling if enabled. - int throwaway = 1; - MPI_Allreduce(MPI_IN_PLACE, &throwaway, 1, MPI_INT, MPI_SUM, *fenix.user_world); - } - - - if (timestamp != NULL) { - *timestamp = group->timestamp; - } - } - return retval; -} - - -/** - * @brief - * @param group_id - * @param member_id - * @param data - * @param max_count - * @param time_stamp - */ -int __fenix_member_restore(int groupid, int memberid, void *data, int maxcount, int timestamp, Fenix_Data_subset* data_found) { - - int retval = FENIX_SUCCESS; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); - int member_index = -1; - - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - - - if (fenix.options.verbose == 25) { - verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_restore: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_restore(group, memberid, data, maxcount, timestamp, data_found); - } - return retval; -} - -/** - * @brief - * @param group_id - * @param member_id - * @param data - * @param max_count - * @param time_stamp - */ -int __fenix_member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, Fenix_Data_subset* data_found) { - - int retval = FENIX_SUCCESS; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); - int member_index = -1; - - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - - - if (fenix.options.verbose == 25) { - verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_lrestore: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_lrestore(group, memberid, data, maxcount, timestamp, data_found); - } - return retval; -} - -/** - * @brief - * @param group_id - * @param member_id - * @param target_buffer - * @param max_count - * @param time_stamp - * @param source_rank - */ -int __fenix_member_restore_from_rank(int groupid, int memberid, void *target_buffer, - int max_count, int time_stamp, int source_rank) { - int retval = FENIX_SUCCESS; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); - int member_index = -1; - - if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - - if (fenix.options.verbose == 25) { - verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_restore: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.member_restore_from_rank(group, memberid, target_buffer, - max_count, time_stamp, source_rank); - } - return retval; -} - - - -/** - * @brief - * @param group_id - * @param num_members - */ -int __fenix_get_number_of_members(int group_id, int *num_members) { - int retval = -1; - int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - *num_members = group->member->count; - retval = FENIX_SUCCESS; - } - return retval; -} - -/** - * @brief - * @param group_id - * @param member_id - * @param position - */ -int __fenix_get_member_at_position(int group_id, int *member_id, int position) { - int retval = -1; - int group_index = __fenix_search_groupid(group_id, fenix.data_recovery); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - if (position < 0 || position > (member->total_size) - 1) { - debug_print( - "ERROR Fenix_Data_group_get_member_at_position: position <%d> must be a value between 0 and number_of_members-1 \n", - position); - retval = FENIX_ERROR_INVALID_POSITION; - } else { - int member_index = ((member->total_size) - 1) - position; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - *member_id = mentry->memberid; - retval = FENIX_SUCCESS; - } - } - return retval; -} - -/** - * @brief - * @param group_id - * @param num_snapshots - */ -int __fenix_get_number_of_snapshots(int group_id, int *num_snapshots) { - int retval = -1; - int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.get_number_of_snapshots(group, num_snapshots); - } - return retval; -} - -/** - * @brief - * @param group_id - * @param position - * @param time_stamp - */ -int __fenix_get_snapshot_at_position(int groupid, int position, int *timestamp) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - if (fenix.options.verbose == 33) { - verbose_print("c-rank: %d, role: %d, group_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); - } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - *timestamp = group->timestamp - position; - } - return retval; -} - -/** - * @brief - * @param group_id - * @param member_id - * @param attribute_name - * @param attribute_value - * @param flag - * @param source_rank - */ -int __fenix_member_get_attribute(int groupid, int memberid, int attributename, - void *attributevalue, int *flag, int sourcerank) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - - if(group_index != -1){ - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - } - - if (fenix.options.verbose == 34) { - verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index); - } - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_get: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_get: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - - int retval = group->vtbl.member_get_attribute(group, mentry, attributename, - attributevalue, flag, sourcerank); - - } - return retval; -} - -/** - * @brief - * @param group_id - * @param member_id - * @param attribute_name - * @param attribute_value - * @param flag - */ -int __fenix_member_set_attribute(int groupid, int memberid, int attributename, - void *attributevalue, int *flag) { - int retval = -1; - int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); - int member_index = -1; - - if(group_index != -1){ - member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index]->member, memberid); - } - - if (fenix.options.verbose == 35) { - verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", - __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, - member_index); - } - - if (group_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_set: group_id <%d> does not exist\n", - groupid); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (member_index == -1) { - debug_print("ERROR Fenix_Data_member_attr_set: member_id <%d> does not exist\n", - memberid); - retval = FENIX_ERROR_INVALID_MEMBERID; - } else { - int my_datatype_size; - int myerr; - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - fenix_member_t *member = group->member; - fenix_member_entry_t *mentry = &(member->member_entry[member_index]); - - //Always pass attribute changes along to group - they might have unknown attributes - //or side-effects to handle from changes. They get change info before - //changes are made, in case they need prior state. - retval = group->vtbl.member_set_attribute(group, mentry, attributename, - attributevalue, flag); - - switch (attributename) { - case FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER: - mentry->user_data = attributevalue; - break; - case FENIX_DATA_MEMBER_ATTRIBUTE_COUNT: - mentry->current_count = *((int *) (attributevalue)); - retval = FENIX_SUCCESS; - break; - case FENIX_DATA_MEMBER_ATTRIBUTE_DATATYPE: - - myerr = MPI_Type_size(*((MPI_Datatype *)(attributevalue)), &my_datatype_size); - - if( myerr ) { - debug_print( - "ERROR Fenix_Data_member_attr_get: Fenix currently does not support this MPI_DATATYPE; error %d\n", myerr - ); - retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; - } - - mentry->datatype_size = my_datatype_size; - retval = FENIX_SUCCESS; - break; - - default: - //Only an issue if the policy also doesn't have this attribute. - if(retval){ - debug_print("ERROR Fenix_Data_member_attr_get: invalid attribute_name <%d>\n", - attributename); - retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; - } - break; - } - } - return retval; -} - -/** - * @brief - * @param group_id - * @param time_stamp - */ -int __fenix_snapshot_delete(int group_id, int time_stamp) { - int retval = -1; - int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); - if (group_index == -1) { - debug_print("ERROR Fenix_Data_snapshot_delete: group_id <%d> does not exist\n", - group_id); - retval = FENIX_ERROR_INVALID_GROUPID; - } else if (time_stamp < 0) { - debug_print( - "ERROR Fenix_Data_snapshot_delete: time_stamp <%d> must be greater than zero\n", - time_stamp); - retval = FENIX_ERROR_INVALID_TIMESTAMP; - } else { - fenix_group_t *group = (fenix.data_recovery->group[group_index]); - retval = group->vtbl.snapshot_delete(group, time_stamp); - } - return retval; -} - -///////////////////////////////////////////////////// TODO // - -void __fenix_store_single() { - - -} - -#if 0 //This needs to be reworked for the new data redundancy framework. - //Lots of info about member versions etc. has been moved to policy-specific - //data. -/** - * - */ -void __feninx_dr_print_store() { - int group, member, version, local, remote; - fenix_data_recovery_t *current = fenix.data_recovery; - int group_count = current->count; - for (group = 0; group < group_count; group++) { - int member_count = current->group[group]->member->count; - for (member = 0; member < member_count; member++) { - int version_count = current->group[group]->member->member_entry[member].version->count; - for (version = 0; version < version_count; version++) { - int local_data_count = current->group[group]->member->member_entry[member].version->local_entry[version].count; - int *local_data = current->group[group]->member->member_entry[member].version->local_entry[version].data; - for (local = 0; local < local_data_count; local++) { - //printf("*** store rank[%d] group[%d] member[%d] local[%d]: %d\n", - //get_current_rank(fenix.new_world), group, member, local, - //local_data[local]); - } - int remote_data_count = current->group[group]->member->member_entry[member].version->remote_entry[version].count; - int *remote_data = current->group[group]->member->member_entry[member].version->remote_entry[version].data; - for (remote = 0; remote < remote_data_count; remote++) { - printf("*** store rank[%d] group[%d] member[%d] remote[%d]: %d\n", - __fenix_get_current_rank(fenix.new_world), group, member, remote, - remote_data[remote]); - } - } - } - } -} - -/** - * - */ - -void __fenix_dr_print_restore() { - fenix_data_recovery_t *current = fenix.data_recovery; - int group_count = current->count; - int member_count = current->group[0]->member->count; - int version_count = current->group[0]->member->member_entry[0].version->count; - int local_data_count = current->group[0]->member->member_entry[0].version->local_entry[0].count; - int remote_data_count = current->group[0]->member->member_entry[0].version->remote_entry[0].count; - printf("*** restore rank: %d; group: %d; member: %d; local: %d; remote: %d\n", - __fenix_get_current_rank(fenix.new_world), group_count, member_count, - local_data_count, - remote_data_count); -} - -/** - * - */ -void __fenix_dr_print_datastructure() { - int group_index, member_index, version_index, remote_data_index, local_data_index; - fenix_data_recovery_t *current = fenix.data_recovery; - - if (!current) { - return; - } - - printf("\n\ncurrent_rank: %d\n", __fenix_get_current_rank(fenix.new_world)); - int group_size = current->total_size; - for (group_index = 0; group_index < group_size; group_index++) { - int depth = current->group[group_index]->depth; - int groupid = current->group[group_index]->groupid; - int timestamp = current->group[group_index]->timestamp; - int group_state = current->group[group_index]->state; - int member_size = current->group[group_index]->member->total_size; - int member_count = current->group[group_index]->member->count; - switch (group_state) { - case EMPTY: - printf("group[%d] depth: %d groupid: %d timestamp: %d state: %s member.size: %d member.count: %d\n", - group_index, depth, groupid, timestamp, "EMPTY", member_size, - member_count); - break; - case OCCUPIED: - printf("group[%d] depth: %d groupid: %d timestamp: %d state: %s member.size: %d member.count: %d\n", - group_index, depth, groupid, timestamp, "OCCUPIED", member_size, - member_count); - break; - case DELETED: - printf("group[%d] depth: %d groupid: %d timestamp: %d state: %s member.size: %d member.count: %d\n", - group_index, depth, groupid, timestamp, "DELETED", member_size, - member_count); - break; - default: - break; - } - - for (member_index = 0; member_index < member_size; member_index++) { - int memberid = current->group[group_index]->member->member_entry[member_index].memberid; - int member_state = current->group[group_index]->member->member_entry[member_index].state; - int version_size = current->group[group_index]->member->member_entry[member_index].version->total_size; - int version_count = current->group[group_index]->member->member_entry[member_index].version->count; - switch (member_state) { - case EMPTY: - printf("group[%d] member[%d] memberid: %d state: %s depth.size: %d depth.count: %d\n", - group_index, member_index, memberid, "EMPTY", version_size, - version_count); - break; - case OCCUPIED: - printf("group[%d] member[%d] memberid: %d state: %s depth.size: %d depth.count: %d\n", - group_index, member_index, memberid, "OCCUPIED", version_size, - version_count); - break; - case DELETED: - printf("group[%d] member[%d] memberid: %d state: %s depth.size: %d depth.count: %d\n", - group_index, member_index, memberid, "DELETED", version_size, - version_count); - break; - default: - break; - } - - for (version_index = 0; version_index < version_size; version_index++) { - int local_data_count = current->group[group_index]->member->member_entry[member_index].version->local_entry[version_index].count; - printf("group[%d] member[%d] version[%d] local_data.count: %d\n", - group_index, - member_index, - version_index, local_data_count); - if (current->group[group_index]->member->member_entry[member_index].version->local_entry[version_index].data != - NULL) { - int *current_local_data = (int *) current->group[group_index]->member->member_entry[member_index].version->local_entry[version_index].data; - for (local_data_index = 0; local_data_index < local_data_count; local_data_index++) { - printf("group[%d] member[%d] depth[%d] local_data[%d]: %d\n", - group_index, - member_index, - version_index, local_data_index, - current_local_data[local_data_index]); - } - } - - int remote_data_count = current->group[group_index]->member->member_entry[member_index].version->remote_entry[version_index].count; - printf("group[%d] member[%d] version[%d] remote_data.count: %d\n", - group_index, - member_index, version_index, remote_data_count); - if (current->group[group_index]->member->member_entry[member_index].version->remote_entry[version_index].data != - NULL) { - int *current_remote_data = current->group[group_index]->member->member_entry[member_index].version->remote_entry[version_index].data; - for (remote_data_index = 0; remote_data_index < remote_data_count; remote_data_index++) { - printf("group[%d] member[%d] depth[%d] remote_data[%d]: %d\n", - group_index, - member_index, version_index, remote_data_index, - current_remote_data[remote_data_index]); - } - } - } - } - } -} -#endif diff --git a/src/fenix_data_recovery.cpp b/src/fenix_data_recovery.cpp new file mode 100644 index 0000000..1ef833b --- /dev/null +++ b/src/fenix_data_recovery.cpp @@ -0,0 +1,730 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + + + +#include "fenix.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_data_policy.hpp" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" +#include "fenix_ext.hpp" +#include "fenix_data_subset.hpp" + +#include + +namespace Fenix::Data { + +/** + * @brief create new group or recover group data for lost processes + * @param groud_id + * @param comm + * @param time_start + * @param depth + */ +int group_create( int groupid, MPI_Comm comm, int timestart, int depth, int policy_name, + void* policy_value, int* flag) { + + int retval = -1; + + /* Retrieve the array index of the group maintained under the cover. */ + int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); + + if (fenix.options.verbose == 12) { + + verbose_print("c-rank: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), group_index); + } + + + /* Check the integrity of user's input */ + if ( timestart < 0 ) { + + debug_print("ERROR Fenix_Data_group_create: time_stamp <%d> must be greater than or equal to zero\n", timestart); + retval = FENIX_ERROR_INVALID_TIMESTAMP; + + } else if ( depth < -1 ) { + + debug_print("ERROR Fenix_Data_group_create: depth <%d> must be greater than or equal to -1\n",depth); + retval = FENIX_ERROR_INVALID_DEPTH; + + } else { + + /* This code block checks the need for data recovery. */ + /* If so, recover the data and set the recovery */ + /* for member recovery. */ + + int i; + int remote_need_recovery; + fenix_group_t *group; + MPI_Status status; + + fenix_data_recovery_t *data_recovery = fenix.data_recovery; + + /* Initialize Group. The group hasn't been created. */ + /* I am either a brand-new process or a recovered process. */ + if (group_index == -1 ) { + + if (fenix.options.verbose == 12 && __fenix_get_current_rank(comm) == 0) { + printf("this is a new group!\n"); + } + + /* Obtain an available group slot */ + group_index = __fenix_find_next_group_position(data_recovery); + + /* Initialize Group */ + __fenix_policy_get_group(data_recovery->group + group_index, comm, timestart, + depth, policy_name, policy_value, flag); + + //The group has filled any group-specific details, we need to fill in the core details. + group = (data_recovery->group[ group_index ] ); + group->groupid = groupid; + group->timestart = timestart; + group->timestamp = -1; //indicates no commits yet + group->depth = depth; + group->comm = comm; + MPI_Comm_rank(comm, &(group->current_rank)); + + + //Update the count AFTER finding next group position. + data_recovery->count++; + + if ( fenix.options.verbose == 12) { + verbose_print( + "c-rank: %d, g-groupid: %d, g-timestart: %d, g-depth: %d\n", + __fenix_get_current_rank(fenix.new_world), group->groupid, + group->timestart, + group->depth); + } + + } else { /* Already created. Renew the MPI communicator */ + + group = ( data_recovery->group[group_index] ); + group->comm = comm; /* Renew communicator */ + MPI_Comm_rank(comm, &(group->current_rank)); + + + //Reinit group metadata as needed w/ new communicator. + group->reinit(flag); + } + + + /* Global agreement among the group */ + retval = FENIX_SUCCESS; + } + return retval; +} + +int __fenix_group_get_redundancy_policy(int groupid, int* policy_name, int* policy_value, int* flag){ + int retval = -1; + int group_index = __fenix_search_groupid( groupid, fenix.data_recovery ); + + if(group_index == -1){ + debug_print("ERROR Fenix_Data_member_create: group_id <%d> does not exist\n", + groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t* group = fenix.data_recovery->group[group_index]; + retval = group->get_redundant_policy(policy_name, policy_value, flag); + } + + return retval; +} + + +/** + * @brief + * @param group_id + * @param member_id + * @param data + * @param count + * @param data_type + */ +int member_create( + int groupid, int memberid, void *data, int count, MPI_Datatype datatype +) { + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->search_member(memberid); + if(mentry){ + debug_print("ERROR Fenix_Data_member_create: member_id <%d> already exists\n", + memberid); + return FENIX_ERROR_INVALID_MEMBERID; + } + + if (fenix.options.verbose == 13) { + verbose_print("c-rank: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), + group_index, member_index); + } + + //First, we'll make a fenix-core member entry, then pass that info to + //the specific data policy. + mentry = __fenix_data_member_add_entry(group, memberid, data, count, __fenix_get_size(datatype)); + + //Pass the info along to the policy + return group->member_create(mentry); +} + + +/** + * @brief + * @param request + */ +int __fenix_data_wait( Fenix_Request request ) { + int retval = -1; + int result = __fenix_mpi_wait(&(request.mpi_recv_req)); + + if (result != MPI_SUCCESS) { + retval = FENIX_SUCCESS; + } else { + retval = FENIX_ERROR_DATA_WAIT; + } + + result = __fenix_mpi_wait(&(request.mpi_send_req)); + + if (result != MPI_SUCCESS) { + retval = FENIX_SUCCESS; + } else { + retval = FENIX_ERROR_DATA_WAIT; + } + + return retval; +} + +/** + * @brief + * @param request + * @param flag + */ +int __fenix_data_test(Fenix_Request request, int *flag) { + int retval = -1; + int result = ( __fenix_mpi_test(&(request.mpi_recv_req)) & __fenix_mpi_test(&(request.mpi_send_req))) ; + + if ( result == 1 ) { + *flag = 1; + retval = FENIX_SUCCESS; + } else { + *flag = 0 ; // incomplete error? + retval = FENIX_ERROR_DATA_WAIT; + } + return retval; + /* Good 2/10/17 */ +} + +/** + * @brief // TODO: implement FENIX_DATA_MEMBER_ALL + * @param group_id + * @param member_id + * @param subset_specifier + * + */ + +int member_store(int groupid, int memberid, const DataSubset& specifier) { + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_store: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; + } + + return group->member_store(memberid, specifier); +} + +int member_storev(int groupid, int memberid, const DataSubset& specifier) { + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_storev: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; + } + + return group->member_storev(memberid, specifier); +} + +/** + * @brief + * @param group_id + * @param member_id + * @param subset_specifier + * @param request + */ +int member_istore(int groupid, int memberid, const DataSubset& specifier, + Fenix_Request *request) { + fatal_print("unimplemented"); + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_istore: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; + } + + return group->member_istore(memberid, specifier, request); +} + +int member_istorev( + int groupid, int memberid, const DataSubset& specifier, Fenix_Request *request +) { + fatal_print("unimplemented"); + auto [group_index, group] = find_group(groupid); + if(!group){ + debug_print("ERROR Fenix_Data_member_istore: group_id <%d> does not exist", groupid); + return FENIX_ERROR_INVALID_GROUPID; + } + + return group->member_istore(memberid, specifier, request); +} + + +/** + * @brief + * @param group_id + * @param time_stamp + */ +int commit(int groupid, int *timestamp) { + /* No communication is performed */ + /* Return the new timestamp */ + int retval = -1; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); + if (fenix.options.verbose == 22) { + verbose_print("c-rank: %d, role: %d, group_index: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); + } + if (group_index == -1) { + debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + + if (group->timestamp != -1) group->timestamp++; + else group->timestamp = group->timestart; + + group->commit(); + + if (timestamp != NULL) { + *timestamp = group->timestamp; + } + + retval = FENIX_SUCCESS; + } + return retval; +} + +/** + * @brief + * @param group_id + * @param time_stamp + */ +int commit_barrier(int groupid, int *timestamp) { + int retval = -1; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); + if (fenix.options.verbose == 23) { + verbose_print("c-rank: %d, role: %d, group_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); + } + if (group_index == -1) { + debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + + //We want to make sure there aren't any failed MPI operations (IE unfinished stores) + //But we don't want to fail to commit if a failure has happened since a successful store. + int old_failure_handling = fenix.ignore_errs; + fenix.ignore_errs = 1; + + int can_commit = 0; + + //We'll use comm_agree as a resilient barrier + //Our error handler also enters an agree, with a unique location bit set. + //So if we aren't all here, we've hit an error already. + + int location = FENIX_DATA_COMMIT_BARRIER_LOC; + int ret = MPIX_Comm_agree(*fenix.user_world, &location); + if(location == FENIX_DATA_COMMIT_BARRIER_LOC) can_commit = 1; + + fenix.ignore_errs = old_failure_handling; + + if(can_commit == 1){ + if (group->timestamp != -1) group->timestamp++; + else group->timestamp = group->timestart; + retval = group->commit(); + } + + + if(can_commit != 1 || ret != MPI_SUCCESS) { + //A rank failure has happened, lets trigger error handling if enabled. + int throwaway = 1; + MPI_Allreduce(MPI_IN_PLACE, &throwaway, 1, MPI_INT, MPI_SUM, *fenix.user_world); + } + + + if (timestamp != NULL) { + *timestamp = group->timestamp; + } + } + return retval; +} + + +/** + * @brief + * @param group_id + * @param member_id + * @param data + * @param max_count + * @param time_stamp + */ +int member_restore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { + int retval = FENIX_SUCCESS; + data_found = {}; + + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); + + if (fenix.options.verbose == 25) { + int member_index = -1; + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); + verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, + member_index); + } + + if (group_index == -1) { + debug_print("ERROR Fenix_Data_member_restore: group_id <%d> does not exist\n", + groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + retval = group->member_restore(memberid, data, maxcount, timestamp, data_found); + } + return retval; +} + +/** + * @brief + * @param group_id + * @param member_id + * @param data + * @param max_count + * @param time_stamp + */ +int member_lrestore(int groupid, int memberid, void *data, int maxcount, int timestamp, DataSubset& data_found) { + int retval = FENIX_SUCCESS; + data_found = {}; + + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); + int member_index = -1; + + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); + + + if (fenix.options.verbose == 25) { + verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, + member_index); + } + + if (group_index == -1) { + debug_print("ERROR Fenix_Data_member_lrestore: group_id <%d> does not exist\n", + groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + retval = group->member_lrestore(memberid, data, maxcount, timestamp, data_found); + } + return retval; +} + +/** + * @brief + * @param group_id + * @param member_id + * @param target_buffer + * @param max_count + * @param time_stamp + * @param source_rank + */ +int __fenix_member_restore_from_rank(int groupid, int memberid, void *target_buffer, + int max_count, int time_stamp, int source_rank) { + int retval = FENIX_SUCCESS; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery); + int member_index = -1; + + if(group_index != -1) member_index = __fenix_search_memberid(fenix.data_recovery->group[group_index], memberid); + + if (fenix.options.verbose == 25) { + verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, + member_index); + } + + if (group_index == -1) { + debug_print("ERROR Fenix_Data_member_restore: group_id <%d> does not exist\n", + groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + retval = group->member_restore_from_rank(memberid, target_buffer, + max_count, time_stamp, source_rank); + } + return retval; +} + + + +/** + * @brief + * @param group_id + * @param num_members + */ +int __fenix_get_number_of_members(int group_id, int *num_members) { + auto group = find_group(group_id).second; + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + *num_members = group->members.size(); + return FENIX_SUCCESS; +} + +/** + * @brief + * @param group_id + * @param member_id + * @param position + */ +int __fenix_get_member_at_position(int group_id, int *member_id, int position) { + auto [group_index, group] = find_group(group_id); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + if(position < 0 || position >= group->members.size()){ + debug_print( + "ERROR Fenix_Data_group_get_member_at_position: position <%d> must be a value between 0 and number_of_members-1 \n", + position); + return FENIX_ERROR_INVALID_POSITION; + } + auto iter = group->members.begin(); + std::advance(iter, position); + *member_id = iter->first; + return FENIX_SUCCESS; +} + +std::optional> group_members(int group_id){ + auto [group_index, group] = find_group(group_id); + if(!group) return {}; + return group->get_member_ids(); +} + +std::optional> group_snapshots(int group_id){ + auto [group_index, group] = find_group(group_id); + if(!group) return {}; + return group->get_snapshots(); +} + +/** + * @brief + * @param group_id + * @param num_snapshots + */ +int __fenix_get_number_of_snapshots(int group_id, int *num_snapshots) { + int retval = -1; + int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); + if (group_index == -1) { + debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", group_id); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + retval = group->get_number_of_snapshots(num_snapshots); + } + return retval; +} + +/** + * @brief + * @param group_id + * @param position + * @param time_stamp + */ +int __fenix_get_snapshot_at_position(int groupid, int position, int *timestamp) { + int retval = -1; + int group_index = __fenix_search_groupid(groupid, fenix.data_recovery ); + if (fenix.options.verbose == 33) { + verbose_print("c-rank: %d, role: %d, group_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index); + } + if (group_index == -1) { + debug_print("ERROR Fenix_Data_commit: group_id <%d> does not exist\n", groupid); + retval = FENIX_ERROR_INVALID_GROUPID; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + *timestamp = group->timestamp - position; + } + return retval; +} + +/** + * @brief + * @param group_id + * @param member_id + * @param attribute_name + * @param attribute_value + * @param flag + * @param source_rank + */ +int __fenix_member_get_attribute(int groupid, int memberid, int attributename, + void *attributevalue, int *flag, int sourcerank) { + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->find_member(memberid); + if(!mentry) return FENIX_ERROR_INVALID_MEMBERID; + + if (fenix.options.verbose == 34) { + verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, + member_index); + } + + return group->member_get_attribute(mentry, attributename, + attributevalue, flag, sourcerank); +} + +/** + * @brief + * @param group_id + * @param member_id + * @param attribute_name + * @param attribute_value + * @param flag + */ +int __fenix_member_set_attribute(int groupid, int memberid, int attributename, + void *attributevalue, int *flag) { + auto [group_index, group] = find_group(groupid); + if(!group) return FENIX_ERROR_INVALID_GROUPID; + + auto [member_index, mentry] = group->find_member(memberid); + if(!mentry) return FENIX_ERROR_INVALID_MEMBERID; + + if (fenix.options.verbose == 35) { + verbose_print("c-rank: %d, role: %d, group_index: %d, member_index: %d\n", + __fenix_get_current_rank(fenix.new_world), fenix.role, group_index, + member_index); + } + + int my_datatype_size; + int myerr; + + //Always pass attribute changes along to group - they might have unknown attributes + //or side-effects to handle from changes. They get change info before + //changes are made, in case they need prior state. + int retval = group->member_set_attribute(mentry, attributename, + attributevalue, flag); + + switch (attributename) { + case FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER: + mentry->user_data = (char*)attributevalue; + break; + case FENIX_DATA_MEMBER_ATTRIBUTE_COUNT: + mentry->current_count = *((int *) (attributevalue)); + retval = FENIX_SUCCESS; + break; + case FENIX_DATA_MEMBER_ATTRIBUTE_DATATYPE: + + myerr = MPI_Type_size(*((MPI_Datatype *)(attributevalue)), &my_datatype_size); + + if( myerr ) { + debug_print( + "ERROR Fenix_Data_member_attr_get: Fenix currently does not support this MPI_DATATYPE; invalid attribute_value <%d>\n", + attributevalue); + retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; + } + + mentry->datatype_size = my_datatype_size; + retval = FENIX_SUCCESS; + break; + + default: + //Only an issue if the policy also doesn't have this attribute. + if(retval){ + debug_print("ERROR Fenix_Data_member_attr_get: invalid attribute_name <%d>\n", + attributename); + retval = FENIX_ERROR_INVALID_ATTRIBUTE_NAME; + } + break; + } + + return retval; +} + +/** + * @brief + * @param group_id + * @param time_stamp + */ +int snapshot_delete(int group_id, int time_stamp) { + int retval = -1; + int group_index = __fenix_search_groupid(group_id, fenix.data_recovery ); + if (group_index == -1) { + debug_print("ERROR Fenix_Data_snapshot_delete: group_id <%d> does not exist\n", + group_id); + retval = FENIX_ERROR_INVALID_GROUPID; + } else if (time_stamp < 0) { + debug_print( + "ERROR Fenix_Data_snapshot_delete: time_stamp <%d> must be greater than zero\n", + time_stamp); + retval = FENIX_ERROR_INVALID_TIMESTAMP; + } else { + fenix_group_t *group = (fenix.data_recovery->group[group_index]); + retval = group->snapshot_delete(time_stamp); + } + return retval; +} + +} // namespace Fenix::Data diff --git a/src/fenix_data_subset.c b/src/fenix_data_subset.c deleted file mode 100644 index ffbb864..0000000 --- a/src/fenix_data_subset.c +++ /dev/null @@ -1,809 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// -// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| -// _| _| _|_| _| _| _| _| -// _|_|_| _|_|_| _| _| _| _| _| -// _| _| _| _|_| _| _| _| -// _| _|_|_|_| _| _| _|_|_| _| _| -// -// -// -// -// Copyright (C) 2016 Rutgers University and Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, -// Michael Heroux, and Matthew Whitlock -// -// Questions? Contact Keita Teranishi (knteran@sandia.gov) and -// Marc Gamell (mgamell@cac.rutgers.edu) -// -// ************************************************************************ -//@HEADER -*/ - -#include "mpi.h" -#include "fenix-config.h" -#include "fenix_ext.h" -#include "fenix_data_subset.h" - - -int __fenix_data_subset_init(int num_blocks, Fenix_Data_subset* subset){ - int retval = -1; - if(num_blocks <= 0){ - debug_print("ERROR __fenix_data_subset_init: num_regions <%d> must be positive\n", - num_blocks); - } else { - subset->start_offsets = (int*) s_malloc(sizeof(int) * num_blocks); - subset->end_offsets = (int*) s_malloc(sizeof(int) * num_blocks); - subset->num_repeats = (int*) s_calloc(num_blocks, sizeof(int)); - subset->num_blocks = num_blocks; - retval = FENIX_SUCCESS; - } - return retval; -} - -/** - * @brief - * @param num_blocks - * @param start_offset - * @param end_offset - * @param stride - * @param subset_specifier - * - * This routine creates - */ -int __fenix_data_subset_create(int num_blocks, int start_offset, int end_offset, int stride, - Fenix_Data_subset *subset_specifier) { - int retval = -1; - if (num_blocks <= 0) { - debug_print("ERROR Fenix_Data_subset_create: num_blocks <%d> must be positive\n", - num_blocks); - retval = FENIX_ERROR_SUBSET_NUM_BLOCKS; - } else if (start_offset < 0) { - debug_print("ERROR Fenix_Data_subset_create: start_offset <%d> must be positive\n", - start_offset); - retval = FENIX_ERROR_SUBSET_START_OFFSET; - } else if (end_offset < 0) { - debug_print("ERROR Fenix_Data_subset_create: end_offset <%d> must be positive\n", - end_offset); - retval = FENIX_ERROR_SUBSET_END_OFFSET; - } else if (stride <= 0) { - debug_print("ERROR Fenix_Data_subset_create: stride <%d> must be positive\n", stride); - retval = FENIX_ERROR_SUBSET_STRIDE; - } else { - //This is a simple subset with a single region descriptor that simply - //repeats num_blocks times. - __fenix_data_subset_init(1 /*Only 1 block, repeated*/, subset_specifier); - - subset_specifier->start_offsets[0] = start_offset; - subset_specifier->end_offsets[0] = end_offset; - subset_specifier->num_repeats[0] = num_blocks-1; - subset_specifier->stride = stride; - subset_specifier->specifier = __FENIX_SUBSET_CREATE; - retval = FENIX_SUCCESS; - } - return retval; -} - -/** - * @brief - * @param num_blocks - * @param array_start_offsets - * @param array_end_offsets - * @param subset_specifier - */ -int __fenix_data_subset_createv(int num_blocks, int *array_start_offsets, int *array_end_offsets, - Fenix_Data_subset *subset_specifier) { - - int retval = -1; - if (num_blocks <= 0) { - debug_print("ERROR Fenix_Data_subset_createv: num_blocks <%d> must be positive\n", - num_blocks); - retval = FENIX_ERROR_SUBSET_NUM_BLOCKS; - } else if (array_start_offsets == NULL) { - debug_print( "ERROR Fenix_Data_subset_createv: array_start_offsets %s must be at least of size 1\n", ""); - retval = FENIX_ERROR_SUBSET_START_OFFSET; - } else if (array_end_offsets == NULL) { - debug_print( "ERROR Fenix_Data_subset_createv: array_end_offsets %s must at least of size 1\n", ""); - retval = FENIX_ERROR_SUBSET_END_OFFSET; - } else { - - // first check that the start offsets and end offsets are valid - int index; - int invalid_index = -1; - int found_invalid_index = 0; - for (index = 0; found_invalid_index != 1 && (index < num_blocks); index++) { - if (array_start_offsets[index] > array_end_offsets[index]) { - invalid_index = index; - found_invalid_index = 1; - } - } - - if (found_invalid_index != 1) { // if not true (!= 1) - __fenix_data_subset_init(num_blocks, subset_specifier); - - memcpy(subset_specifier->start_offsets, array_start_offsets, ( num_blocks * sizeof(int))); // deep copy - memcpy(subset_specifier->end_offsets, array_end_offsets, ( num_blocks * sizeof(int))); // deep copy - - subset_specifier->specifier = __FENIX_SUBSET_CREATEV; - subset_specifier->stride = 0; - retval = FENIX_SUCCESS; - } else { - debug_print( - "ERROR Fenix_Data_subset_createv: array_end_offsets[%d] must be less than array_start_offsets[%d]\n", - invalid_index, invalid_index); - retval = FENIX_ERROR_SUBSET_END_OFFSET; - } - } - return retval; -} - -//This should only be used to copy to a currently non-inited subset -// If the destination already has memory allocated in the num_blocks/offsets regions -// then this can lead to memory leaks. -// For the sake of consistent memory management, this will always return a subset with -// a valid memory allocation for each pointer in the subset. -void __fenix_data_subset_deep_copy(Fenix_Data_subset* from, Fenix_Data_subset* to){ - if(from->specifier == __FENIX_SUBSET_FULL || from->specifier == __FENIX_SUBSET_EMPTY){ - __fenix_data_subset_init(1, to); - to->specifier = from->specifier; - } else { - __fenix_data_subset_init(from->num_blocks, to); - memcpy(to->num_repeats, from->num_repeats, to->num_blocks*sizeof(int)); - memcpy(to->start_offsets, from->start_offsets, to->num_blocks*sizeof(int)); - memcpy(to->end_offsets, from->end_offsets, to->num_blocks*sizeof(int)); - to->specifier = from->specifier; - to->stride = from->stride; - } -} - -//This function checks for any overlapping regions and removes them. -void __fenix_data_subset_simplify_regions(Fenix_Data_subset* ss){ - int space_allocated = ss->num_blocks; - - if(ss->specifier == __FENIX_SUBSET_CREATE){ - //We will handle this by viewing the data as regions of size stride. - //Each block will be broken into a value dictating which regions it is - //within, and what data within each region it is within. - // - //If two blocks do not overlap within regions, there is no overlap. - //If they overlap within regions, but the regions they touch do not overlap, - //there is no overlap. etc. - - for(int i = 0; i < ss->num_blocks-1; i++){ - int did_merge = 0; - - for(int j = i+1; j < ss->num_blocks; j++){ - //We will simplify the logic by switching from i and j referencing - //to viewing the two blocks in the order that they exist in the data. - int first_block; - int second_block; - - if(ss->start_offsets[i] < ss->start_offsets[j]){ - first_block = i; - second_block = j; - } else { - first_block = j; - second_block = i; - } - - //Check for the case that the merged and unmerged regions are the same. - int merged_same_as_unmerged = ((ss->start_offsets[first_block]%ss->stride) == (ss->start_offsets[second_block]%ss->stride) - && (ss->end_offsets[first_block]%ss->stride) == (ss->end_offsets[second_block]%ss->stride)); - int merged_same_as_first = 0, merged_same_as_second = 0; - - - // We want the smallest x | (first_block_end + stride * x >= second_block_start) - // As this gives us which repetition an overlap is first possible on. - // Simplify to x >= (second_block_start - first_block_end)/s - // We want the lowest, so swap >= with =, and since we need an integer we'll round up. - int first_intersecting_repetition, option2; - if(ss->start_offsets[second_block] - ss->end_offsets[first_block] > 0){ - first_intersecting_repetition = (ss->start_offsets[second_block] - ss->end_offsets[first_block] - 1)/ss->stride + 1; - // = ceil( (ss->start_offsets[second_block] - ss->end_offsets[first_block]) / ss->stride) - } else { - first_intersecting_repetition = 0; - } - - // The above only accounts for one of two cases of intersection. There other is provided by option 2. - if(ss->end_offsets[second_block] - ss->end_offsets[first_block] > 0){ - option2 = (ss->end_offsets[second_block] - ss->end_offsets[first_block] - 1)/ss->stride + 1; - } else { - option2 = 0; - } - - if(merged_same_as_unmerged){ - //If there's no difference in merged/unmerged, we can 'skip' a stride - //and it'll al still be the same. - if(!( first_intersecting_repetition <= ss->num_repeats[first_block]+1 - || option2 <= ss->num_repeats[first_block]+1 )){ - //Both still require too high a repetition than we have. No overlap. - continue; - } - } else { - if(!( first_intersecting_repetition <= ss->num_repeats[first_block] - || option2 <= ss->num_repeats[first_block] )){ - //Both require too high a repetition than we have. No overlap. - continue; - } - } - - merged_same_as_first = ss->start_offsets[first_block] + ss->stride*first_intersecting_repetition <= ss->start_offsets[second_block] - && ss->end_offsets[first_block] + ss->stride*first_intersecting_repetition >= ss->end_offsets[second_block]; - merged_same_as_second = ss->start_offsets[second_block] + ss->stride*first_intersecting_repetition <= ss->start_offsets[first_block] - && ss->end_offsets[second_block] + ss->stride*first_intersecting_repetition >= ss->end_offsets[first_block]; - - //We have found the smallest overlap candidate, now we see if there is overlap there. - int blocks_overlap; - if(first_intersecting_repetition < option2){ - blocks_overlap = ( (ss->stride * first_intersecting_repetition + ss->start_offsets[first_block]) <= ss->start_offsets[second_block]); - } else { - first_intersecting_repetition = option2; - blocks_overlap = ( (ss->stride * first_intersecting_repetition + ss->start_offsets[first_block]) <= ss->end_offsets[second_block]); - } - - if(!blocks_overlap){ - continue; - } - - int length_first_only_start; - int length_first_only_end; - int length_both; - int length_second_only; - int merged_start; - int merged_end; - - - length_first_only_start = first_intersecting_repetition; - - length_first_only_start = length_first_only_start > (ss->num_repeats[i] + 1) ? - (ss->num_repeats[i] + 1) : length_first_only_start; - - int remaining_first_repetitions = ss->num_repeats[first_block] + 1 - length_first_only_start; - if(remaining_first_repetitions > ss->num_repeats[second_block]+1){ - length_both = ss->num_repeats[second_block] + 1; - length_second_only = 0; - length_first_only_end = ss->num_repeats[first_block]+1 - length_first_only_start - length_both; - } else { - length_both = remaining_first_repetitions; - length_second_only = ss->num_repeats[second_block]+1 - remaining_first_repetitions; - length_first_only_end = 0; - } - - if(merged_same_as_unmerged){ - length_both = length_both + length_first_only_end + length_first_only_start + length_second_only; - length_first_only_start = length_first_only_end = length_second_only = 0; - } else if(merged_same_as_first){ - length_both = length_both + length_first_only_end + length_first_only_start; - - length_first_only_start = length_first_only_end = 0; - } else if(merged_same_as_second){ - length_both = length_both + length_both; - - length_both = 0; - } - - //Record info for merged region before we overwrite data we need. - merged_start = ss->stride*length_first_only_start + ss->start_offsets[first_block]; - merged_start = merged_start < ss->start_offsets[second_block] ? - merged_start : ss->start_offsets[second_block]; - - merged_end = ss->stride*length_first_only_start + ss->end_offsets[first_block]; - merged_end = merged_end < ss->end_offsets[second_block] ? - merged_end : ss->end_offsets[second_block]; - - //Now we know what the overlap is, so we make the changes to the data subset. - int store_index = 0; - int store_locations[3] = {first_block, second_block, ss->num_blocks}; - - if(length_first_only_start > 0){ - ss->num_repeats[first_block] = length_first_only_start - 1; - store_index++; - } - if(length_first_only_end > 0){ - ss->num_repeats[store_locations[store_index]] = length_first_only_end-1; - ss->start_offsets[store_locations[store_index]] = ss->stride*(length_first_only_start+length_both) + - ss->start_offsets[first_block]; - ss->end_offsets[store_locations[store_index]] = ss->stride*(length_first_only_start+length_both) + - ss->end_offsets[first_block]; - store_index++; - } else if(length_second_only > 0){ - ss->num_repeats[store_locations[store_index]] = length_second_only-1; - ss->start_offsets[store_locations[store_index]] = ss->stride*(length_both) + - ss->start_offsets[second_block]; - ss->end_offsets[store_locations[store_index]] = ss->stride*(length_both) + - ss->end_offsets[second_block]; - store_index++; - } - - //There is always a merged region to add. - if(store_index == 2){ - //We're adding a new block, so we need to make sure we have allocated - //enough memory space. - ss->num_blocks++; - if(ss->num_blocks > space_allocated){ - - ss->end_offsets = (int*) s_realloc(ss->end_offsets, - (space_allocated * 2) * sizeof(int)); - ss->start_offsets = (int*) s_realloc(ss->start_offsets, - (space_allocated * 2) * sizeof(int)); - ss->num_repeats = (int*) s_realloc(ss->num_repeats, - (space_allocated * 2) * sizeof(int)); - space_allocated *= 2; - } - - } - ss->start_offsets[store_locations[store_index]] = merged_start; - ss->end_offsets[store_locations[store_index]] = merged_end; - ss->num_repeats[store_locations[store_index]] = length_both - 1; - store_index++; - - - //Check if num_repeats[second_block] < 0, if so remove it. - //This could occur if both blocks can be perfectly minimized to a single block. - if(store_index == 1){ - if(second_block == ss->num_blocks-1){ - //Don't need to move anything. - ss->num_blocks--; - } else { - //We need to move everything over by one. - memmove(ss->num_repeats + second_block, ss->num_repeats + second_block + 1, - ss->num_blocks - second_block - 1); - memmove(ss->start_offsets + second_block, ss->start_offsets + second_block + 1, - ss->num_blocks - second_block - 1); - memmove(ss->end_offsets + second_block, ss->end_offsets + second_block + 1, - ss->num_blocks - second_block - 1); - ss->num_blocks--; - } - } - - did_merge = 1; - } - - //If we merged w/ anything, recheck w/ new merged block. - if(did_merge) i--; - } - } else if(ss->specifier == __FENIX_SUBSET_CREATEV){ - //This is much simpler than with CREATE type, since we don't have to - //worry about repetition. - for(int i = 0; i < ss->num_blocks-1; i++){ - int did_merge = 0; - - for(int j = i+1; j < ss->num_blocks; j++){ - if( ss->start_offsets[i] <= ss->end_offsets[j]+1 && - ss->end_offsets[i] >= ss->start_offsets[j]-1){ - did_merge = 1; - - ss->start_offsets[i] = (ss->start_offsets[i] < ss->start_offsets[j]) ? - ss->start_offsets[i] : - ss->start_offsets[j]; - - ss->end_offsets[i] = (ss->end_offsets[i] > ss->end_offsets[j]) ? - ss->end_offsets[i] : - ss->end_offsets[j]; - - //Move everything over to remove j - memmove(ss->start_offsets + j, ss->start_offsets + j + 1, - (ss->num_blocks - j - 1) * sizeof(int)); - memmove(ss->end_offsets + j, ss->end_offsets + j + 1, - (ss->num_blocks - j - 1) * sizeof(int)); - ss->num_blocks--; - } - } - - if(did_merge) i--; - } - } - - if(space_allocated > ss->num_blocks){ - ss->end_offsets = (int*) s_realloc(ss->end_offsets, - ss->num_blocks * sizeof(int)); - ss->start_offsets = (int*) s_realloc(ss->start_offsets, - ss->num_blocks * sizeof(int)); - ss->num_repeats = (int*) s_realloc(ss->num_repeats, - ss->num_blocks * sizeof(int)); - } - -} - -//This should only be used to copy to a currently non-inited subset -// If the destination already has memory allocated in the num_blocks/offsets regions -// then this can lead to double-mallocs or memory leaks. -void __fenix_data_subset_merge(Fenix_Data_subset* first_subset, Fenix_Data_subset* second_subset, - Fenix_Data_subset* output){ - - //Simple cases first - if(first_subset->specifier == __FENIX_SUBSET_FULL || - second_subset->specifier == __FENIX_SUBSET_FULL){ - //We don't need to populate anything else. - output->specifier = __FENIX_SUBSET_FULL; - //We still have to init, else there will be a memory error when the user tries to free later. - __fenix_data_subset_init(1, output); - - } else if(first_subset->specifier == __FENIX_SUBSET_EMPTY){ - __fenix_data_subset_deep_copy(second_subset, output); - - } else if(second_subset->specifier == __FENIX_SUBSET_EMPTY){ - __fenix_data_subset_deep_copy(first_subset, output); - - } else if(first_subset->specifier == __FENIX_SUBSET_CREATE && - second_subset->specifier == __FENIX_SUBSET_CREATE && - first_subset->stride == second_subset->stride){ - //Output is just a CREATE type with combined descriptors. - //Start by making a list of all descriptors, then merge any with overlaps. - output->stride = first_subset->stride; - output->num_blocks = first_subset->num_blocks - + second_subset->num_blocks; - __fenix_data_subset_init(output->num_blocks, output); - output->specifier = __FENIX_SUBSET_CREATE; - - memcpy(output->num_repeats, first_subset->num_repeats, first_subset->num_blocks * sizeof(int)); - memcpy(output->num_repeats+first_subset->num_blocks, second_subset->num_repeats, - second_subset->num_blocks * sizeof(int)); - - memcpy(output->start_offsets, first_subset->start_offsets, first_subset->num_blocks * sizeof(int)); - memcpy(output->start_offsets+first_subset->num_blocks, second_subset->start_offsets, - second_subset->num_blocks * sizeof(int)); - - memcpy(output->end_offsets, first_subset->end_offsets, first_subset->num_blocks * sizeof(int)); - memcpy(output->end_offsets+first_subset->num_blocks, second_subset->end_offsets, - second_subset->num_blocks * sizeof(int)); - - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(output); - } else { - output->specifier = __FENIX_SUBSET_CREATEV; - - output->num_blocks = first_subset->num_blocks + second_subset->num_blocks; - if(first_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < first_subset->num_blocks; i++){ - output->num_blocks += first_subset->num_repeats[i]; - } - } - if(second_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < second_subset->num_blocks; i++){ - output->num_blocks += second_subset->num_repeats[i]; - } - } - - __fenix_data_subset_init(output->num_blocks, output); - - int index = 0; - for(int i = 0; i < first_subset->num_blocks; i++){ - for(int j = 0; j <= first_subset->num_repeats[i]; j++){ - output->start_offsets[index] = j*first_subset->stride + first_subset->start_offsets[i]; - output->end_offsets[index] = j*first_subset->stride + first_subset->end_offsets[i]; - index++; - } - } - for(int i = 0; i < second_subset->num_blocks; i++){ - for(int j = 0; j <= second_subset->num_repeats[i]; j++){ - output->start_offsets[index] = j*second_subset->stride + second_subset->start_offsets[i]; - output->end_offsets[index] = j*second_subset->stride + second_subset->end_offsets[i]; - index++; - } - } - - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(output); - } -} - -//Merge second subset into first subset -//This reasonably assumes both subsets are already initialized. -void __fenix_data_subset_merge_inplace(Fenix_Data_subset* first_subset, Fenix_Data_subset* second_subset){ - - //Simple cases first - if(first_subset->specifier == __FENIX_SUBSET_FULL || - second_subset->specifier == __FENIX_SUBSET_FULL){ - //We don't need to populate anything else. - first_subset->specifier = __FENIX_SUBSET_FULL; - - } else if(second_subset->specifier == __FENIX_SUBSET_EMPTY){ - //Do nothing. - - } else if(first_subset->specifier == __FENIX_SUBSET_EMPTY){ - //Deep copy requires that the destination be non-initialized, so free sub1 first. - __fenix_data_subset_free(first_subset); - __fenix_data_subset_deep_copy(second_subset, first_subset); - - } else if(first_subset->specifier == __FENIX_SUBSET_CREATE && - second_subset->specifier == __FENIX_SUBSET_CREATE && - first_subset->stride == second_subset->stride){ - //Output is just a CREATE type with combined descriptors. - //Start by making a list of all descriptors, then merge any with overlaps. - first_subset->num_repeats = (int*)s_realloc(first_subset->num_repeats, - (first_subset->num_blocks + second_subset->num_blocks)*sizeof(int)); - first_subset->start_offsets = (int*)s_realloc(first_subset->start_offsets, - (first_subset->num_blocks + second_subset->num_blocks)*sizeof(int)); - first_subset->end_offsets = (int*)s_realloc(first_subset->end_offsets, - (first_subset->num_blocks + second_subset->num_blocks)*sizeof(int)); - - memcpy(first_subset->num_repeats+first_subset->num_blocks, second_subset->num_repeats, - second_subset->num_blocks * sizeof(int)); - - memcpy(first_subset->start_offsets+first_subset->num_blocks, second_subset->start_offsets, - second_subset->num_blocks * sizeof(int)); - - memcpy(first_subset->end_offsets+first_subset->num_blocks, second_subset->end_offsets, - second_subset->num_blocks * sizeof(int)); - - first_subset->num_blocks = first_subset->num_blocks - + second_subset->num_blocks; - - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(first_subset); - } else { - - int new_num_blocks = first_subset->num_blocks + second_subset->num_blocks; - if(first_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < first_subset->num_blocks; i++){ - new_num_blocks += first_subset->num_repeats[i]; - } - } - if(second_subset->specifier == __FENIX_SUBSET_CREATE){ - for(int i = 0; i < second_subset->num_blocks; i++){ - new_num_blocks += second_subset->num_repeats[i]; - } - } - - first_subset->num_repeats = (int*)s_realloc(first_subset->num_repeats, new_num_blocks*sizeof(int)); - first_subset->start_offsets = (int*)s_realloc(first_subset->start_offsets, new_num_blocks*sizeof(int)); - first_subset->end_offsets = (int*)s_realloc(first_subset->end_offsets, new_num_blocks*sizeof(int)); - - //work backwards to prevent overwriting current data. - - int index = new_num_blocks-1; - for(int i = second_subset->num_blocks-1; i >= 0; i--){ - for(int j = 0; j <= second_subset->num_repeats[i]; j++){ - first_subset->start_offsets[index] = j*second_subset->stride - + second_subset->start_offsets[i]; - first_subset->end_offsets[index] = j*second_subset->stride - + second_subset->end_offsets[i]; - first_subset->num_repeats[index] = 0; - index--; - } - } - for(int i = first_subset->num_blocks-1; i >= 0; i--){ - for(int j = 0; j <= first_subset->num_repeats[i]; j++){ - first_subset->start_offsets[index] = j*first_subset->stride - + first_subset->start_offsets[i]; - first_subset->end_offsets[index] = j*first_subset->stride - + first_subset->end_offsets[i]; - first_subset->num_repeats[index] = 0; - index--; - } - } - first_subset->specifier = __FENIX_SUBSET_CREATEV; - first_subset->num_blocks = new_num_blocks; - - //Now we have all of the regions, so we just need to simplify them. - __fenix_data_subset_simplify_regions(first_subset); - } - -} - - -void __fenix_data_subset_copy_data(Fenix_Data_subset* ss, void* dest, void* src, size_t data_type_size, size_t max_size){ - if(ss->specifier == __FENIX_SUBSET_FULL){ - memcpy(dest, src, max_size*data_type_size); - } else if(ss->specifier != __FENIX_SUBSET_EMPTY){ - for(int i = 0; i < ss->num_blocks; i++){ - //Inclusive both directions, so add 1. - int length = ss->end_offsets[i]-ss->start_offsets[i] + 1; - - for(int j = 0; j <= ss->num_repeats[i]; j++){ - int start = ss->start_offsets[i] + j*ss->stride; - memcpy( ((uint8_t*)dest) + start*data_type_size, ((uint8_t*)src) + start*data_type_size, length*data_type_size); - } - } - } -} - -int __fenix_data_subset_data_size(Fenix_Data_subset* ss, size_t max_size){ - int size; - - if(ss->specifier == __FENIX_SUBSET_FULL){ - size = max_size; - } else if( ss->specifier == __FENIX_SUBSET_EMPTY){ - size = 0; - } else { - size = 0; - for(int i = 0; i < ss->num_blocks; i++){ - size += (ss->end_offsets[i] - ss->start_offsets[i] + 1)*(ss->num_repeats[i]+1); - } - } - - return size; -} - -int __fenix_data_subset_is_full(Fenix_Data_subset *ss, size_t data_length){ - //Assumes a "simplified" subset which has all mergeable regions merged. - return (ss->specifier == __FENIX_SUBSET_FULL) || - ( (ss->start_offsets[0] == 0) && (ss->end_offsets[0] == data_length-1) ); -} - -//Makes an array with the in-order contents of subset ss of src. -//size is updated to the size of the serialized array, which is returned as the function's return. -//User's responsibility to free the returned array. -void* __fenix_data_subset_serialize(Fenix_Data_subset* ss, void* src, size_t type_size, size_t max_size, size_t* size){ - - void* dest; - - if(ss->specifier == __FENIX_SUBSET_FULL){ - dest = malloc(type_size*max_size); - - memcpy(dest, src, type_size*max_size); - - *size = max_size; - - } else if(ss->specifier == __FENIX_SUBSET_EMPTY) { - - dest = NULL; - size = 0; - - } else { - //First, count up the number of entries to find a size. - *size = __fenix_data_subset_data_size(ss, max_size); - - dest = malloc(type_size * (*size)); - - int* current_repetition = (int*) s_calloc(ss->num_blocks, sizeof(int)); - //We need to be sure to go in the right order. - int stored = 0; - while(stored < *size){ - int lowest_index = -1; - int lowest_block = -1; - for(int i = 0; i < ss->num_blocks; i++){ - if(current_repetition[i] <= ss->num_repeats[i]){ - if(lowest_index == -1 || - (lowest_index > ss->start_offsets[i]+ss->stride*current_repetition[i])){ - lowest_index = ss->start_offsets[i] + ss->stride*current_repetition[i]; - lowest_block = i; - } - } - } - - memcpy(((uint8_t*)dest)+stored*type_size, ((uint8_t*)src)+lowest_index*type_size, - type_size*(ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1) ); - stored += ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1; - current_repetition[lowest_block]++; - } - - free(current_repetition); - - } - - - return dest; -} - -void __fenix_data_subset_deserialize(Fenix_Data_subset* ss, void* src, void* dest, size_t max_size, size_t type_size){ - if(ss->specifier == __FENIX_SUBSET_FULL){ - memcpy(dest, src, type_size*max_size); - - } else if(ss->specifier != __FENIX_SUBSET_EMPTY){ - //First, count up the number of entries to find a size. - int size = __fenix_data_subset_data_size(ss, max_size); - - int* current_repetition = (int*) s_calloc(ss->num_blocks, sizeof(int)); - //We need to be sure to go in the right order. - int restored = 0; - while(restored < size){ - int lowest_index = -1; - int lowest_block = -1; - for(int i = 0; i < ss->num_blocks; i++){ - if(current_repetition[i] <= ss->num_repeats[i]){ - if(lowest_index == -1 || - (lowest_index > ss->start_offsets[i]+ss->stride*current_repetition[i])){ - lowest_index = ss->start_offsets[i] + ss->stride*current_repetition[i]; - lowest_block = i; - } - } - } - - memcpy(((uint8_t*)dest)+lowest_index*type_size, ((uint8_t*)src)+restored*type_size, - type_size*(ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1) ); - restored += ss->end_offsets[lowest_block]-ss->start_offsets[lowest_block]+1; - current_repetition[lowest_block]++; - } - - free(current_repetition); - } - -} - -void __fenix_data_subset_send(Fenix_Data_subset* ss, int dest, int tag, MPI_Comm comm){ - int* toSend = (int*)malloc(sizeof(int) * (3 + 3*ss->num_blocks)); - toSend[0] = ss->num_blocks; - - for(int i = 0; i < ss->num_blocks; i++){ - toSend[1+3*i] = ss->start_offsets[i]; - toSend[2+3*i] = ss->end_offsets[i]; - toSend[3+3*i] = ss->num_repeats[i]; - } - - toSend[1+3*ss->num_blocks] = ss->stride; - toSend[2+3*ss->num_blocks] = ss->specifier; - - MPI_Send((void*)toSend, 3*ss->num_blocks + 3, MPI_INT, dest, tag, comm); - free(toSend); -} - -void __fenix_data_subset_recv(Fenix_Data_subset* ss, int src, int tag, MPI_Comm comm){ - MPI_Status status; - MPI_Probe(src, tag, comm, &status); - - int size; - MPI_Get_count(&status, MPI_INT, &size); - - int *recvd = (int*)malloc(sizeof(int) * size); - MPI_Recv((void*)recvd, size, MPI_INT, src, tag, comm, NULL); - - __fenix_data_subset_init(recvd[0], ss); - for(int i = 0; i < ss->num_blocks; i++){ - ss->start_offsets[i] = recvd[1+3*i]; - ss->end_offsets[i] = recvd[2+3*i]; - ss->num_repeats[i] = recvd[3+3*i]; - } - ss->stride = recvd[1+3*ss->num_blocks]; - ss->specifier = recvd[2+3*ss->num_blocks]; - - free(recvd); -} - - -int __fenix_data_subset_free( Fenix_Data_subset *subset_specifier ) { - int retval = FENIX_SUCCESS; - if(subset_specifier->specifier == __FENIX_SUBSET_UNDEFINED){ - fprintf(stderr, "Detected double free of subset!\n"); - } - free( subset_specifier->num_repeats ); - free( subset_specifier->start_offsets ); - free( subset_specifier->end_offsets ); - subset_specifier->specifier = __FENIX_SUBSET_UNDEFINED; - return retval; -} - -/** - * @brief - * @param subset_specifier - */ -int __fenix_data_subset_delete( Fenix_Data_subset *subset_specifier ) { - __fenix_data_subset_free(subset_specifier); - free(subset_specifier); - return FENIX_SUCCESS; -} diff --git a/src/fenix_data_subset.cpp b/src/fenix_data_subset.cpp new file mode 100644 index 0000000..b88e223 --- /dev/null +++ b/src/fenix_data_subset.cpp @@ -0,0 +1,827 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include "mpi.h" +#include "fenix.h" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" +#include "fenix_data_subset.h" +#include "fenix_data_subset.hpp" + +namespace Fenix { +namespace Detail { + +std::pair DataRegion::range() const { + return {start, reps == MAX ? MAX : end+stride*reps}; +} + +size_t DataRegion::count() const { + if(end == MAX || reps == MAX) return MAX; + return (end-start+1)*(reps+1); +} + +bool DataRegion::operator==(const DataRegion& other) const { + return start == other.start && end == other.end && reps == other.reps && + stride == other.stride; +} + +bool DataRegion::operator&&(const DataRegion& b) const { + const auto& a = *this; + + auto [astart, aend] = a.range(); + auto [bstart, bend] = b.range(); + if(astart > bend || bstart > aend) return false; + else if(!a.reps || !b.reps) return true; + + //Both are strided. For now, only allow same-stride operations + fenix_assert(a.stride == b.stride); + + //Get a big modularly idempotent number to avoid negatives + size_t idem = (std::max(aend, bend)/stride + 1)*stride; + + //Start of possible overlap + size_t ostart = std::max(astart, bstart); + size_t arep = (ostart-astart)/stride; + size_t brep = (ostart-bstart)/stride; + DataRegion ablock = a.get_rep(arep); + DataRegion bblock = b.get_rep(brep); + + if(ablock && bblock) return true; + if(arep < a.reps && (bblock && a.get_rep(arep+1))) return true; + if(brep < b.reps && (ablock && b.get_rep(brep+1))) return true; + return false; +} + +bool DataRegion::operator<(const DataRegion& other) const { + if(start != other.start) return start < other.start; + if(end != other.end) return end < other.end; + if(reps != other.reps) return reps < other.reps; + return stride < other.stride; +} + +std::set DataRegion::operator&(const DataRegion& b) const { + fenix_assert(!reps || !b.reps); + if(b.reps) return b & *this; + + if(!(*this && b)){ + return {}; + } else if(!reps){ + return {DataRegion({std::max(start, b.start), std::min(end, b.end)})}; + } + + std::set ret; + + size_t start_rep = 0; + if(b.start > start){ + start_rep = (b.start-start)/stride; + auto block = get_rep(start_rep); + auto valid = block & b; + if(!valid.count(block)){ + ret.merge(valid); + if(start_rep == reps) return ret; + else start_rep++; + } + } + + size_t end_rep = reps; + if(b.end < range().second){ + end_rep = (b.end-start)/stride; + if(end_rep < start_rep) return ret; + + auto block = get_rep(end_rep); + auto valid = block & b; + if(!valid.count(block)){ + ret.merge(valid); + if(end_rep == start_rep) return ret; + else end_rep--; + } + } + + ret.insert(get_reps(start_rep, end_rep)); + return ret; +} + +DataRegion DataRegion::get_rep(size_t n) const { + fenix_assert(n <= reps); + return DataRegion({start+n*stride, end+n*stride}); +} + +DataRegion DataRegion::get_reps(size_t first, size_t last) const { + fenix_assert(first <= last); + fenix_assert(last <= reps); + return DataRegion( + {start+first*stride, end+first*stride}, last-first, stride + ); +} + +DataRegion DataRegion::inverted() const { + fenix_assert(reps); + return DataRegion({end+1, start+stride-1}, reps == MAX ? MAX : reps-1, stride); +} + +std::optional DataRegion::try_merge(const DataRegion& b) const { + if(b < *this){ + return b.try_merge(*this); + } else if(range().second == MAX){ + return {}; + } else if(!reps && !b.reps){ + if(end+1 == b.start) return DataRegion({start, b.end}); + else return {}; + } else { + size_t s = reps ? stride : b.stride; + if(start+s*(reps+1) == b.start && end+s*(reps+1) == b.end) + return DataRegion({start, end}, reps+b.reps+1, s); + else return {}; + } +} + +void merge_adjacent_sets(std::set& a, std::set& b){ + if(a.empty()){ + a = std::move(b); + b = {}; + } + while(!b.empty()){ + auto merged = (--a.end())->try_merge(*b.begin()); + if(!merged){ + a.merge(b); + b.clear(); + } else { + a.erase(a.end()--); + b.erase(b.begin()); + a.insert(*merged); + } + } + while(a.size() > 1){ + auto merged = (----a.end())->try_merge(*(--a.end())); + if(!merged) break; + a.erase(a.end()--); + a.erase(a.end()--); + a.insert(*merged); + } +} + +std::set DataRegion::operator-(const DataRegion& b) const { + if(!(*this && b)) return {*this}; + fenix_assert(!reps || !b.reps || stride == b.stride); + + const auto [bstart, bend] = b.range(); + + std::set ret; + if(bstart > 0) ret.merge(*this & DataRegion({0, bstart-1})); + if(b.reps){ + std::set mid; + auto inv = b.inverted(); + + //Get just the portions of myself that could be overlapping + auto parts = *this & DataRegion({bstart, bend}); + + //Handle any non-strided portions individually + for(auto it = parts.begin(); it != parts.end();){ + if(it->reps){ + it++; + continue; + } else { + mid.merge(*it & inv); + it = parts.erase(it); + } + } + + //Should have taken care of everything, or left only 1 strided region + if(!parts.empty()){ + fenix_assert(parts.size() == 1); + auto a = *parts.begin(); + //First, middle, and last repetitions may be effected differently + mid.merge(a.get_rep(0) & inv); + if(a.reps > 1){ + size_t middle_reps = a.reps-2; + auto middle_valid = a.get_rep(1) & inv; + for(auto block : middle_valid){ + fenix_assert(!block.reps); + mid.insert(DataRegion( + {block.start, block.end}, a.reps-2, b.stride + )); + } + } + mid.merge(a.get_rep(a.reps) & inv); + } + merge_adjacent_sets(ret, mid); + } + if(bend != MAX){ + auto post = *this & DataRegion({bend+1, MAX}); + merge_adjacent_sets(ret, post); + } + return ret; +} + +std::string DataRegion::str() const { + std::string ret = "["; + + if(start == MAX) ret += "MAX"; + else ret += std::to_string(start); + + ret += ","; + + if(end == MAX) ret += "MAX"; + else ret += std::to_string(end); + + ret += "]"; + + if(reps){ + ret += "x" + std::to_string(reps+1) + "s" + std::to_string(stride); + } + return ret; +} + +DataRegion full_region({0, DataRegion::MAX}); + +struct BlockIter { + using set_t = std::set; + using iter_t = typename set_t::iterator; + + BlockIter(set_t&& m_regions) + : region_holder(std::make_shared(std::move(m_regions))), + regions(*region_holder), it(regions.begin()), rep(0) { } + BlockIter(std::shared_ptr m_regions) + : region_holder(m_regions), + regions(*region_holder), it(regions.begin()), rep(0) { } + BlockIter(const BlockIter& other) + : region_holder(other.region_holder), regions(other.regions), it(other.it), rep(other.rep) { + fenix_assert(rep <= it->reps); + ++*this; + } + + DataRegion operator*(){ + return it->get_rep(rep); + } + + bool operator==(const BlockIter& other) const { + return it == other.it && rep == other.rep; + } + bool operator!=(const BlockIter& other) const { + return !(*this == other); + } + + BlockIter begin(){ + return BlockIter(region_holder); + } + + BlockIter end(){ + auto ret = begin(); + ret.it = regions.end(); + return ret; + } + + BlockIter& operator++(){ + if(rep == it->reps){ + ++it; + rep = 0; + } else { + ++rep; + } + return *this; + } + + std::shared_ptr region_holder; + + const set_t& regions; + iter_t it; + size_t rep; + +}; + +} // namespace Detail + +using namespace Detail; + +DataSubset::DataSubset(size_t end) : DataSubset({0, end}) { } + +DataSubset::DataSubset(std::pair bounds) + : DataSubset(bounds, 1, DataRegion::MAX) { }; + +DataSubset::DataSubset( + std::pair bounds, size_t n, size_t stride +) { + fenix_assert(bounds.first <= bounds.second, + "subset start (%lu) cannot be after end (%lu)", + bounds.first, bounds.second + ); + fenix_assert(n > 0, "num_blocks (%lu) must be positive", n); + fenix_assert(n == 1 || bounds.first+stride > bounds.second, + "stride %lu too low for region [%lu, %lu]", + stride, bounds.first, bounds.second + ); + + regions.emplace(bounds, n-1, stride); +} + +DataSubset::DataSubset(std::vector> bounds){ + for(const auto& b : bounds){ + fenix_assert(b.first <= b.second, + "subset start (%lu) cannot be after end (%lu)", b.first, b.second + ); + regions.emplace(b); + } + + //Simplify regions + merge_regions(); +} + +DataSubset::DataSubset(const DataSubset& a, const DataSubset& b){ + if(a.empty() || b.empty()){ + *this = a.empty() ? b : a; + return; + } + if(a.regions.count(full_region) || b.regions.count(full_region)){ + regions.insert(full_region); + return; + } + + size_t a_stride = 0, b_stride = 0; + for(const auto& r : a.regions) if(r.reps) a_stride = r.stride; + for(const auto& r : b.regions) if(r.reps) b_stride = r.stride; + + if(a_stride && b_stride && a_stride != b_stride){ + //De-stride A's regions + for(const auto& r : a.regions){ + fenix_assert(r.reps != MAX); + for(int i = 0; i <= r.reps; i++) + regions.insert(r.get_rep(i)); + } + } else { + regions = a.regions; + } + + for(const auto& br : b.regions){ + const auto [bstart, bend] = br.range(); + std::set adding = {br}; + for(const auto& r : regions){ + const auto [rstart, rend] = r.range(); + if(rstart > bend) break; + if(rend < bstart) continue; + for(auto it = adding.begin(); it != adding.end();){ + auto valid = *it - r; + if(valid.size() == 1 && valid.count(*it)){ + it++; + } else { + it = adding.erase(it); + adding.merge(valid); + } + } + } + regions.merge(adding); + } + + //Final attempt to simplify all regions + merge_regions(); +} + +void DataSubset::merge_regions() { + auto check = regions.begin(); + while(check != regions.end()){ + auto crange = check->range(); + + bool erase = false; + for(auto i = std::next(check); i != regions.end(); i++){ + if(crange.second < i->start-1) break; + + size_t merge_idx = -1; + size_t merge_reps = -1; + + size_t matching_end = i->start-1; + size_t matching_start = i->end+1; + if((matching_end - check->end)%check->stride == 0){ + merge_idx = (matching_end-check->end)/check->stride; + merge_reps = std::min(check->reps-merge_idx, i->reps); + + //Add the merged region + regions.insert(DataRegion( + {check->start+merge_idx*check->stride, i->end}, + merge_reps, check->stride + )); + } else if((matching_start - check->start)%check->stride == 0){ + merge_idx = (matching_start-check->start)/check->stride; + merge_reps = std::min(check->reps-merge_idx, i->reps); + + //Add the merged region + regions.insert(DataRegion( + {i->start, check->end+merge_idx*check->stride}, + merge_reps, check->stride + )); + } + + if(merge_idx != -1){ + erase = true; + + //Add any blocks before the merge + if(merge_idx > 0){ + regions.insert(DataRegion( + {check->start, check->end}, merge_idx-1, check->stride + )); + } + //Add any blocks after the merge + if(merge_idx+merge_reps < check->reps){ + size_t n_pre = merge_idx+merge_reps+1; + size_t offset = n_pre*check->stride; + regions.insert(DataRegion( + {check->start+offset, check->end+offset}, + check->reps-n_pre, check->stride + )); + } else if(merge_reps < i->reps){ + size_t n_pre = merge_reps+1; + size_t offset = n_pre*i->stride; + regions.insert(DataRegion( + {i->start+offset, i->end+offset}, + i->reps-n_pre, i->stride + )); + } + + regions.erase(i); + break; + } + } + + auto it = check++; + if(erase){ + regions.erase(it); + } + } +} + +DataSubset::DataSubset(const DataBuffer& buf){ + fenix_assert(buf.size()%sizeof(DataRegion) == 0); + + size_t n_regions = buf.size()/sizeof(DataRegion); + if(n_regions == 0) return; + + DataRegion* r = (DataRegion*) buf.data(); + for(int i = 0; i < n_regions; i++){ + regions.insert(*(r++)); + } +} + +void DataSubset::serialize(DataBuffer& buf) const { + buf.reset(regions.size()*sizeof(DataRegion)); + DataRegion* r = (DataRegion*) buf.data(); + for(const auto& region : regions){ + *(r++) = region; + } +} + +DataSubset DataSubset::operator+(const DataSubset& other) const { + return DataSubset(*this, other); +} + +DataSubset DataSubset::operator+(const Fenix_Data_subset& other) const { + return *this + *(DataSubset*)other.impl; +} + +DataSubset& DataSubset::operator+=(const DataSubset& other) { + *this = *this + other; + return *this; +} + +DataSubset& DataSubset::operator+=(const Fenix_Data_subset& other) { + return *this += *(DataSubset*)other.impl; +} + +DataSubset DataSubset::operator-(const DataSubset& other) const { + if(empty() || other.empty()) return *this; + + size_t a_stride = 0, b_stride = 0; + for(const auto& r : regions) if(r.reps) a_stride = r.stride; + for(const auto& r : other.regions) if(r.reps) b_stride = r.stride; + + fenix_assert(!( + a_stride && b_stride && a_stride != b_stride && + end() == MAX && other.end() == MAX + )); + + std::set remaining; + if(a_stride && b_stride && a_stride != b_stride){ + size_t b_end = other.end(); + + //Insert individual repetitions possibly overlapping regions + for(const auto& b : BlockIter(bounded_regions(0, b_end))){ + remaining.insert(b); + } + if(b_end != MAX){ + //Leave non-overlapping regions strided + auto br = bounded_regions(b_end+1, MAX); + for(const auto& b : br){ + remaining.insert(b); + } + } + } else { + remaining = regions; + } + + for(const auto& b_r : other.regions){ + auto next = remaining.begin(); + while(next != remaining.end()){ + auto it = next++; + + if(b_r.start > it->range().second) continue; + if(it->start > b_r.range().second) break; + + auto r = *it; + remaining.erase(it); + remaining.merge(r - b_r); + } + } + + DataSubset c; + c.regions = std::move(remaining); + c.merge_regions(); + return c; +} + +bool DataSubset::operator==(const DataSubset& other) const { + //Making checks in approximate order of cost + if(start() != other.start()) return false; + if(regions.empty() != other.regions.empty()) return false; + if(regions == other.regions) return true; + if(end() != other.end()) return false; + + size_t a_stride = 0, b_stride = 0; + for(const auto& r : regions) if(r.reps) a_stride = r.stride; + for(const auto& r : other.regions) if(r.reps) b_stride = r.stride; + + //We won't try comparing infinite regions of different strides. + if(a_stride && b_stride && a_stride != b_stride && end() == MAX) + return false; + + return (*this - other).empty() && (other - *this).empty(); +} + +bool DataSubset::operator!=(const DataSubset& other) const { + return !(*this == other); +} + +bool DataSubset::empty() const { + return regions.empty(); +} + +std::pair DataSubset::range() const { + return {start(), end()}; +} + +size_t DataSubset::start() const { + if(regions.empty()) return -1; + return regions.begin()->start; +} + +size_t DataSubset::end() const { + if(empty()) return -1; + + size_t ret = 0; + for(const auto& r : regions){ + ret = std::max(ret, r.range().second); + } + return ret; +} + +std::set DataSubset::bounded_regions(size_t max_idx) const { + return bounded_regions(0, max_idx); +} + +std::set DataSubset::bounded_regions( + size_t start, size_t end +) const { + std::set ret; + DataRegion bounds({start, end}); + for(const auto& r : regions){ + if(r.start > end) break; + if(r.range().second < start) continue; + ret.merge(r & bounds); + } + return ret; +} + +size_t DataSubset::count(size_t max_index) const { + size_t ret = 0; + for(const auto& r : bounded_regions(max_index)){ + size_t c = r.count(); + if(c == MAX) return 0; + ret+= c; + } + return ret; +} + +size_t DataSubset::max_count() const { + return end()+1; +} + +void DataSubset::serialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst +) const { + if(regions.empty()){ + dst.resize(0); + return; + } + fenix_assert(src.size()%elm_size == 0); + const size_t max_elm = src.size()/elm_size-1; + + dst.reset(count(max_elm) * elm_size); + char* ptr = dst.data(); + + for(const auto b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert((ptr+len)-dst.data() <= dst.size()); + fenix_assert(start+len <= src.size()); + + memcpy(ptr, src.data()+start, len); + ptr += len; + } + + fenix_assert(ptr == dst.data()+dst.size()); +} + +void DataSubset::deserialize_data( + size_t elm_size, const DataBuffer& src, DataBuffer& dst +) const { + if(regions.empty()) return; + fenix_assert(dst.size()%elm_size==0); + + size_t max_elm = dst.size()/elm_size - 1; + if(max_elm == 0){ + max_elm = end(); + fenix_assert(max_elm != MAX); + dst.resize((max_elm+1)*elm_size); + } + + fenix_assert(src.size() == count(max_elm)*elm_size); + const char* ptr = src.data(); + + for(const auto& b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert((ptr+len)-src.data() <= src.size()); + fenix_assert(start+len <= dst.size()); + + memcpy(dst.data()+start, ptr, len); + ptr += len; + } + + fenix_assert(ptr == src.data()+src.size()); +} + +void DataSubset::copy_data( + const size_t elm_size, const size_t src_len, const char* src, DataBuffer& dst +) const { + if(regions.empty()) return; + fenix_assert(src_len != 0 || end() != MAX, + "must specify either a maximum element count or provide a limited-bounds data subset"); + + size_t max_elm = src_len ? src_len-1 : end(); + if(dst.size() < (max_elm+1)*elm_size) dst.resize((max_elm+1)*elm_size); + + for(const auto& b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert(src_len == 0 || (start+len)/elm_size <= src_len); + fenix_assert(start+len <= dst.size()); + + memcpy(dst.data()+start, src+start, len); + } +} + +void DataSubset::copy_data( + const size_t elm_size, const DataBuffer& src, const size_t dst_len, char* dst +) const { + if(regions.empty()) return; + fenix_assert(dst_len != 0 || end() != MAX, + "must specify either a maximum element count or provide a limited-bounds data subset"); + + size_t max_elm = std::min(dst_len ? dst_len-1 : end(), src.size()); + + for(const auto& b : BlockIter(bounded_regions(max_elm))){ + size_t start = b.start*elm_size; + size_t len = (b.end-b.start+1)*elm_size; + + fenix_assert(dst_len == 0 || (start+len)/elm_size <= dst_len); + fenix_assert(start+len <= src.size()); + + memcpy(dst+start, src.data()+start, len); + } +} + +bool DataSubset::includes(size_t idx) const { + return !bounded_regions(idx, idx).empty(); +} + +bool DataSubset::includes_all(size_t end) const { + std::set remaining = {DataRegion({0, end})}; + + for(const auto& r : regions){ + if(r.start > end) break; + + auto next = remaining.begin(); + while(next != remaining.end()){ + auto it = next++; + auto rem = *it; + + remaining.erase(it); + remaining.merge(rem - r); + } + } + + return remaining.empty(); +} + +std::string DataSubset::str() const { + std::string ret = "{"; + for(const auto& r : regions) ret += r.str() + ", "; + if(!empty()){ + ret.pop_back(); + ret.pop_back(); + } + ret += "}"; + return ret; +} + +} // namespace Fenix + +using namespace Fenix; + +int __fenix_data_subset_create( + int num_blocks, int start, int end, int stride, Fenix_Data_subset *subset +) { + subset->impl = new DataSubset({start, end}, num_blocks, stride); + return FENIX_SUCCESS; +} + +int __fenix_data_subset_createv( + int num_blocks, int *starts, int *ends, Fenix_Data_subset *subset +) { + fenix_assert(num_blocks > 0, "num_blocks (%d) must be positive", num_blocks); + + std::vector> bounds; + bounds.reserve(num_blocks); + for(int i = 0; i < num_blocks; i++) bounds.push_back({starts[i], ends[i]}); + + subset->impl = new DataSubset(bounds); + + return FENIX_SUCCESS; +} + +int __fenix_data_subset_free( Fenix_Data_subset *subset ) { + delete (DataSubset*) subset->impl; + return FENIX_SUCCESS; +} diff --git a/src/globals.c b/src/fenix_ext.cpp similarity index 92% rename from src/globals.c rename to src/fenix_ext.cpp index e812a08..a11507c 100644 --- a/src/globals.c +++ b/src/fenix_ext.cpp @@ -12,7 +12,7 @@ // // // -// Copyright (C) 2018 Rutgers University and Sandia Corporation +// Copyright (C) 2016 Rutgers University and Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. @@ -44,8 +44,8 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar -// Michael Heroux, and Matthew Whitlock +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and // Marc Gamell (mgamell@cac.rutgers.edu) @@ -54,8 +54,6 @@ //@HEADER */ -#include "fenix_ext.h" +#include "fenix_ext.hpp" -fenix_t fenix = { - .fenix_init_flag = 0 -}; +Fenix::fenix_t fenix; diff --git a/src/fenix_opt.c b/src/fenix_opt.cpp similarity index 97% rename from src/fenix_opt.c rename to src/fenix_opt.cpp index f6f6fac..5a44298 100644 --- a/src/fenix_opt.c +++ b/src/fenix_opt.cpp @@ -56,9 +56,9 @@ #include #include -#include "fenix_opt.h" -#include "fenix_util.h" -#include "fenix_ext.h" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" +#include "fenix_ext.hpp" #define DEBUG 1 diff --git a/src/fenix_process_recovery.c b/src/fenix_process_recovery.cpp similarity index 67% rename from src/fenix_process_recovery.c rename to src/fenix_process_recovery.cpp index 6a65820..b563b49 100644 --- a/src/fenix_process_recovery.c +++ b/src/fenix_process_recovery.cpp @@ -56,113 +56,73 @@ #include -#include "fenix_ext.h" -#include "fenix_comm_list.h" -#include "fenix_process_recovery_global.h" -#include "fenix_process_recovery.h" -#include "fenix_data_group.h" -#include "fenix_data_recovery.h" -#include "fenix_opt.h" -#include "fenix_util.h" +#include "fenix_ext.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_data_group.hpp" +#include "fenix_data_recovery.hpp" +#include "fenix_opt.hpp" +#include "fenix_util.hpp" #include #include #include -int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, char ***argv, - int spare_ranks, - int spawn, - MPI_Info info, int *error, jmp_buf *jump_environment) -{ - - int ret; - *role = fenix.role; - *error = 0; +using namespace Fenix; +using namespace Fenix::Data; + +int __fenix_preinit( + int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, char ***argv, + int spare_ranks, int spawn, MPI_Info info, int *error, jmp_buf *jump_env +) { + Args::FenixInitArgs args; + args.role = role; + args.in_comm = comm; + args.out_comm = new_comm; + args.argc = argc; + args.argv = argv; + args.spares = spare_ranks; + args.spawn = spawn; + args.err = error; + if(info != MPI_INFO_NULL){ + char value[MPI_MAX_INFO_VAL + 1]; + int vallen = MPI_MAX_INFO_VAL; + int found; - fenix.user_world = new_comm; + MPI_Info_get(info, "FENIX_RESUME_MODE", vallen, value, &found); + if(found) args.resume_mode = get_resume_mode(value); + else args.resume_mode = JUMP; + + MPI_Info_get(info, "FENIX_UNHANDLED_MODE", vallen, value, &found); + if(found) args.unhandled_mode = get_unhandled_mode(value); + } else { + args.resume_mode = JUMP; + } + return fenix_preinit(args, jump_env); +} - MPI_Comm_create_errhandler(__fenix_test_MPI, &fenix.mpi_errhandler); +int fenix_preinit(const Args::FenixInitArgs& args, jmp_buf* jump_env){ + fenix.world = (MPI_Comm *)malloc(sizeof(MPI_Comm)); + MPI_Comm_dup(args.in_comm, fenix.world); - fenix.world = malloc(sizeof(MPI_Comm)); - MPI_Comm_dup(comm, fenix.world); + MPI_Comm_create_errhandler(__fenix_test_MPI, &fenix.mpi_errhandler); PMPI_Comm_set_errhandler(*fenix.world, fenix.mpi_errhandler); - fenix.finalized = 0; - fenix.spare_ranks = spare_ranks; - fenix.spawn_policy = spawn; - fenix.recover_environment = jump_environment; - fenix.role = FENIX_ROLE_INITIAL_RANK; - fenix.fail_world_size = 0; - fenix.ignore_errs = 0; - fenix.resume_mode = __FENIX_RESUME_AT_INIT; - fenix.repair_result = 0; - fenix.ret_role = role; - fenix.ret_error = error; - - fenix.options.verbose = -1; - // __fenix_init_opt(*argc, *argv); - - // For request tracking, make sure we can save at least an integer - // in MPI_Request - if(sizeof(MPI_Request) < sizeof(int)) { - fprintf(stderr, "FENIX ERROR: __fenix_preinit: sizeof(MPI_Request) < sizeof(int)!\n"); - MPI_Abort(comm, -1); - } + fenix.user_world = args.out_comm; + fenix.spare_ranks = args.spares; + fenix.spawn_policy = args.spawn; + fenix.recover_environment = jump_env; + fenix.resume_mode = args.resume_mode; + fenix.callback_exception_mode = args.callback_exception_mode; + fenix.unhandled_mode = args.unhandled_mode; + fenix.ret_role = args.role ? args.role : &fenix.role; + fenix.ret_error = args.err ? args.err : &fenix.repair_result; + *fenix.ret_role = fenix.role; + *fenix.ret_error = FENIX_SUCCESS; MPI_Op_create((MPI_User_function *) __fenix_ranks_agree, 1, &fenix.agree_op); - /* Check the values in info */ - if (info != MPI_INFO_NULL) { - char value[MPI_MAX_INFO_VAL + 1]; - int vallen = MPI_MAX_INFO_VAL; - int flag; - - MPI_Info_get(info, "FENIX_RESUME_MODE", vallen, value, &flag); - if (flag == 1) { - if (strcmp(value, "Fenix_init") == 0) { - fenix.resume_mode = __FENIX_RESUME_AT_INIT; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, value: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - } else if (strcmp(value, "NO_JUMP") == 0) { - fenix.resume_mode = __FENIX_RESUME_NO_JUMP; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, value: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - - } else { - /* No support. Setting it to Fenix_init */ - fenix.resume_mode = __FENIX_RESUME_AT_INIT; - } - } - - - MPI_Info_get(info, "FENIX_UNHANDLED_MODE", vallen, value, &flag); - if (flag == 1) { - if (strcmp(value, "SILENT") == 0) { - fenix.print_unhandled = 0; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, UNHANDLED_MODE: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - } else if (strcmp(value, "NO_JUMP") == 0) { - fenix.print_unhandled = 1; - if (fenix.options.verbose == 0) { - verbose_print("rank: %d, role: %d, UNHANDLED_MODE: %s\n", - __fenix_get_current_rank(*fenix.world), fenix.role, value); - } - - } else { - /* No support. Setting it to silent */ - fenix.print_unhandled = 0; - } - } - } - - if (fenix.spare_ranks >= __fenix_get_world_size(comm)) { + if (fenix.spare_ranks >= __fenix_get_world_size(*fenix.world)) { debug_print("Fenix: <%d> spare ranks requested are unavailable\n", fenix.spare_ranks); } @@ -170,50 +130,43 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha fenix.data_recovery = __fenix_data_recovery_init(); /*****************************************************/ - /* Note: fenix.new_world is only valid for the */ + /* Note: fenix.new_world is only valid for the */ /* active MPI ranks. Spare ranks do not */ /* allocate any communicator content with this.*/ /* Any MPI calls in spare ranks with new_world */ /* trigger an abort. */ /*****************************************************/ - ret = 1; - while (ret) { - ret = __fenix_create_new_world(); - if (ret) { - // just_repair_process(); - } - } + //Try to create new_world until success + while (__fenix_create_new_world()); if ( __fenix_spare_rank() != 1) { fenix.num_inital_ranks = __fenix_get_world_size(fenix.new_world); if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", __fenix_get_current_rank(*fenix.world), fenix.role, - fenix.num_inital_ranks); + fenix.num_inital_ranks); } } else { - fenix.num_inital_ranks = spare_ranks; + fenix.num_inital_ranks = fenix.spare_ranks; if (fenix.options.verbose == 0) { verbose_print("rank: %d, role: %d, number_initial_ranks: %d\n", __fenix_get_current_rank(*fenix.world), fenix.role, - fenix.num_inital_ranks); + fenix.num_inital_ranks); } } - fenix.num_survivor_ranks = 0; - fenix.num_recovered_ranks = 0; + fenix.fenix_init_flag = true; while ( __fenix_spare_rank() == 1) { int a; - int myrank; MPI_Status mpi_status; - fenix.ignore_errs = 1; - ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, + fenix.ignore_errs = true; + int ret = PMPI_Recv(&a, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *fenix.world, &mpi_status); // listen for a failure - fenix.ignore_errs = 0; + fenix.ignore_errs = false; if (ret == MPI_SUCCESS) { if (fenix.options.verbose == 0) { verbose_print("Finalize the program; rank: %d, role: %d\n", @@ -234,11 +187,33 @@ int __fenix_preinit(int *role, MPI_Comm comm, MPI_Comm *new_comm, int *argc, cha if(fenix.role != FENIX_ROLE_RECOVERED_RANK) MPI_Comm_dup(fenix.new_world, fenix.user_world); - fenix.user_world_exists = 1; + fenix.user_world_exists = true; return fenix.role; } +Fenix_Resume_mode get_resume_mode(const std::string_view& name){ + if (name == "JUMP") { + return Fenix::JUMP; + } else if (name == "RETURN") { + return Fenix::RETURN; + } else if (name == "THROW") { + return Fenix::THROW; + } + fatal_print("Unsupported FENIX_RESUME_MODE %s", name.data()); +} + +Fenix_Unhandled_mode get_unhandled_mode(const std::string_view& name){ + if (name == "SILENT") { + return Fenix::SILENT; + } else if (name == "PRINT") { + return Fenix::PRINT; + } else if (name == "ABORT") { + return Fenix::ABORT; + } + fatal_print("Unsupported FENIX_UNHANDLED_MODE %s", name.data()); +} + int __fenix_spare_rank_within(MPI_Comm refcomm) { int result = -1; @@ -384,121 +359,111 @@ int __fenix_repair_ranks() rt_code = FENIX_WARNING_SPARE_RANKS_DEPLETED; - if (fenix.spare_ranks != 0) { - - /***************************************/ - /* Fill the ranks in increasing order */ - /***************************************/ + /***************************************/ + /* Fill the ranks in increasing order */ + /***************************************/ - int active_ranks; + int active_ranks; - survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); + survivor_world = (int *) s_malloc(survivor_world_size * sizeof(int)); - ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, - world_without_failures); + ret = PMPI_Allgather(¤t_rank, 1, MPI_INT, survivor_world, 1, MPI_INT, + world_without_failures); - if (fenix.options.verbose == 2) { - int index; - for (index = 0; index < survivor_world_size; index++) { - verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", - current_rank, fenix.role, index, - survivor_world[index]); - } + if (fenix.options.verbose == 2) { + int index; + for (index = 0; index < survivor_world_size; index++) { + verbose_print("current_rank: %d, role: %d, survivor_world[%d]: %d\n", + current_rank, fenix.role, index, + survivor_world[index]); } + } - //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } - if (ret != MPI_SUCCESS) { - repair_success = 0; - if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); - } - MPI_Comm_free(&world_without_failures); - free(survivor_world); - goto END_LOOP; + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allgather. repair_ranks\n"); } + if (ret != MPI_SUCCESS) { + repair_success = 0; + if (ret == MPI_ERR_PROC_FAILED) { + MPIX_Comm_revoke(world_without_failures); } + MPI_Comm_free(&world_without_failures); + free(survivor_world); + goto END_LOOP; + } - survived_flag = 0; - if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { - survived_flag = 1; - } + survived_flag = 0; + if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { + survived_flag = 1; + } - ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, - MPI_INT, MPI_SUM, world_without_failures); + ret = PMPI_Allreduce(&survived_flag, &fenix.num_survivor_ranks, 1, + MPI_INT, MPI_SUM, world_without_failures); - //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } - if (ret != MPI_SUCCESS) { - repair_success = 0; - if (ret == MPI_ERR_PROC_FAILED) { - MPIX_Comm_revoke(world_without_failures); - } - MPI_Comm_free(&world_without_failures); - free(survivor_world); - goto END_LOOP; + //if (ret != MPI_SUCCESS) { debug_print("MPI_Allreduce. repair_ranks\n"); } + if (ret != MPI_SUCCESS) { + repair_success = 0; + if (ret == MPI_ERR_PROC_FAILED) { + MPIX_Comm_revoke(world_without_failures); } + MPI_Comm_free(&world_without_failures); + free(survivor_world); + goto END_LOOP; + } - fenix.num_inital_ranks = 0; + fenix.num_inital_ranks = 0; - /* recovered ranks must be the number of spare ranks */ - fenix.num_recovered_ranks = fenix.fail_world_size; + /* recovered ranks must be the number of spare ranks */ + fenix.num_recovered_ranks = fenix.fail_world_size; - if (fenix.options.verbose == 2) { - verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", - current_rank, fenix.role, - fenix.num_recovered_ranks); - } - - if(fenix.role != FENIX_ROLE_INITIAL_RANK){ - free(fenix.fail_world); - } - fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, - fenix.fail_world_size); + if (fenix.options.verbose == 2) { + verbose_print("current_rank: %d, role: %d, recovered_ranks: %d\n", + current_rank, fenix.role, + fenix.num_recovered_ranks); + } - if (fenix.options.verbose == 2) { - int index; - for (index = 0; index < fenix.fail_world_size; index++) { - verbose_print("fail_world[%d]: %d\n", index, fenix.fail_world[index]); - } + if(fenix.role != FENIX_ROLE_INITIAL_RANK){ + free(fenix.fail_world); + } + fenix.fail_world = __fenix_get_fail_ranks(survivor_world, survivor_world_size, + fenix.fail_world_size); + + if (fenix.options.verbose == 2) { + int index; + for (index = 0; index < fenix.fail_world_size; index++) { + verbose_print("fail_world[%d]: %d\n", index, fenix.fail_world[index]); } + } - free(survivor_world); + free(survivor_world); - active_ranks = world_size - fenix.spare_ranks; + active_ranks = world_size - fenix.spare_ranks; - if (fenix.options.verbose == 2) { - verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", - current_rank, fenix.role, - active_ranks); - } + if (fenix.options.verbose == 2) { + verbose_print("current_rank: %d, role: %d, active_ranks: %d\n", + current_rank, fenix.role, + active_ranks); + } - /* Assign new rank for reordering */ - if (current_rank >= active_ranks) { // reorder ranks - int rank_offset = ((world_size - 1) - current_rank); - - for(int fail_i = 0; fail_i < fenix.fail_world_size; fail_i++){ - if(fenix.fail_world[fail_i] > current_rank) rank_offset--; - } + /* Assign new rank for reordering */ + if (current_rank >= active_ranks) { // reorder ranks + int rank_offset = ((world_size - 1) - current_rank); - if (rank_offset < fenix.fail_world_size) { - if (fenix.options.verbose == 11) { - verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", - current_rank, fenix.fail_world[rank_offset]); - } - current_rank = fenix.fail_world[rank_offset]; - } + for(int fail_i = 0; fail_i < fenix.fail_world_size; fail_i++){ + if(fenix.fail_world[fail_i] > current_rank) rank_offset--; } - /************************************/ - /* Update the number of spare ranks */ - /************************************/ - fenix.spare_ranks = 0; - - //debug_print("not enough spare ranks to repair rank failures. repair_ranks\n"); + if (rank_offset < fenix.fail_world_size) { + if (fenix.options.verbose == 11) { + verbose_print("reorder ranks; current_rank: %d -> new_rank: %d\n", + current_rank, fenix.fail_world[rank_offset]); + } + current_rank = fenix.fail_world[rank_offset]; + } } - /****************************************************************/ - /* No rank reordering is required if no spare rank is available */ - /****************************************************************/ - + /************************************/ + /* Update the number of spare ranks */ + /************************************/ + fenix.spare_ranks = 0; } } else { @@ -659,7 +624,7 @@ int* __fenix_get_fail_ranks(int *survivor_world, int survivor_world_size, int fa qsort(survivor_world, survivor_world_size, sizeof(int), __fenix_comparator); int failed_pos = 0; - int *fail_ranks = calloc(fail_world_size, sizeof(int)); + int *fail_ranks = (int *)calloc(fail_world_size, sizeof(int)); int i; for (i = 0; i < survivor_world_size + fail_world_size; i++) { @@ -678,13 +643,10 @@ int __fenix_spare_rank(){ return __fenix_spare_rank_within(*fenix.world); } -void __fenix_postinit(int *error) +void __fenix_postinit() { - - //if (fenix.options.verbose == 9) { - // verbose_print(" postinit: current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), - // fenix.role); - //} + *fenix.ret_role = fenix.role; + *fenix.ret_error = fenix.repair_result; if(fenix.new_world_exists){ //Set up dummy irecv to use for checking for failures. @@ -692,20 +654,10 @@ void __fenix_postinit(int *error) 34095347, fenix.new_world, &fenix.check_failures_req); } - if (fenix.repair_result != 0) { - *error = fenix.repair_result; - } - fenix.fenix_init_flag = 1; - -#if 0 - if (fenix.role != FENIX_ROLE_INITIAL_RANK) { - init_data_recovery(); + if(fenix.role != FENIX_ROLE_INITIAL_RANK) { + __fenix_callback_invoke_all(); } -#endif - if (fenix.role == FENIX_ROLE_SURVIVOR_RANK) { - __fenix_callback_invoke_all(*error); - } if (fenix.options.verbose == 9) { verbose_print("After barrier. current_rank: %d, role: %d\n", __fenix_get_current_rank(fenix.new_world), fenix.role); @@ -744,7 +696,7 @@ void __fenix_finalize() int last_spare_rank = __fenix_get_world_size(*fenix.world) - 1; //If we've reached here, we will finalized regardless of further errors. - fenix.ignore_errs = 1; + fenix.ignore_errs = true; while(!fenix.finalized){ int user_rank = __fenix_get_current_rank(*fenix.user_world); @@ -767,7 +719,7 @@ void __fenix_finalize() } else { //If rank 0 did contribute, we know sends made it, and regardless //of any other failures we finalize. - fenix.finalized = 1; + fenix.finalized = true; } } @@ -787,18 +739,16 @@ void __fenix_finalize() free(fenix.fail_world); } - /* Free Callbacks */ - __fenix_callback_destroy( fenix.callback_list ); - /* Free data recovery interface */ __fenix_data_recovery_destroy( fenix.data_recovery ); - fenix.fenix_init_flag = 0; + /* Free up any C++ data structures, reset default variables */ + fenix = {}; } void __fenix_finalize_spare() { - fenix.fenix_init_flag = 0; + fenix.fenix_init_flag = false; int unused; MPI_Request agree_req, recv_req = MPI_REQUEST_NULL; @@ -825,13 +775,11 @@ void __fenix_finalize_spare() MPI_Comm_set_errhandler(*fenix.world, MPI_ERRORS_ARE_FATAL); MPI_Comm_free(fenix.world); - /* Free callbacks */ - __fenix_callback_destroy( fenix.callback_list ); - /* Free data recovery interface */ __fenix_data_recovery_destroy( fenix.data_recovery ); - fenix.fenix_init_flag = 0; + /* Free up any C++ data structures, reset default variables */ + fenix = {}; /* Future version do not close MPI. Jump to where Fenix_Finalize is called. */ MPI_Finalize(); @@ -842,55 +790,61 @@ void __fenix_test_MPI(MPI_Comm *pcomm, int *pret, ...) { int ret_repair; int index; - int ret = *pret; + fenix.mpi_fail_code = *pret; + if(!fenix.fenix_init_flag || __fenix_spare_rank() == 1 || fenix.ignore_errs) { return; } - switch (ret) { - case MPI_ERR_PROC_FAILED_PENDING: - case MPI_ERR_PROC_FAILED: - MPIX_Comm_revoke(*fenix.world); - MPIX_Comm_revoke(fenix.new_world); - - if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); - - - __fenix_comm_list_destroy(); + switch (fenix.mpi_fail_code) { + case MPI_ERR_PROC_FAILED_PENDING: + case MPI_ERR_PROC_FAILED: + MPIX_Comm_revoke(*fenix.world); + MPIX_Comm_revoke(fenix.new_world); + if(fenix.user_world_exists) MPIX_Comm_revoke(*fenix.user_world); - fenix.repair_result = __fenix_repair_ranks(); - break; - case MPI_ERR_REVOKED: - __fenix_comm_list_destroy(); - - fenix.repair_result = __fenix_repair_ranks(); - break; - case MPI_ERR_INTERN: - printf("Fenix detected error: MPI_ERR_INTERN\n"); - default: - if(fenix.print_unhandled){ + fenix.repair_result = __fenix_repair_ranks(); + break; + case MPI_ERR_REVOKED: + fenix.repair_result = __fenix_repair_ranks(); + break; + default: int len; char errstr[MPI_MAX_ERROR_STRING]; - MPI_Error_string(ret, errstr, &len); - fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); - } - return; - break; + MPI_Error_string(fenix.mpi_fail_code, errstr, &len); + switch (fenix.unhandled_mode) { + case ABORT: + fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); + MPI_Abort(*fenix.world, 1); + break; + case PRINT: + fprintf(stderr, "UNHANDLED ERR: %s\n", errstr); + break; + case SILENT: + break; + default: + printf("Fenix internal error: Unknown unhandled mode %d\n", fenix.unhandled_mode); + assert(false); + break; + } + return; + break; } - fenix.role = FENIX_ROLE_SURVIVOR_RANK; + __fenix_postinit(); if(!fenix.finalized) { switch(fenix.resume_mode) { - case __FENIX_RESUME_AT_INIT: + case JUMP: longjmp(*fenix.recover_environment, 1); break; - case __FENIX_RESUME_NO_JUMP: - *(fenix.ret_role) = FENIX_ROLE_SURVIVOR_RANK; - __fenix_postinit(fenix.ret_error); + case RETURN: + break; + case THROW: + Fenix::throw_exception(); break; default: - printf("Fenix detected error: Unknown resume mode\n"); + printf("Fenix internal error: Unknown resume mode %d\n", fenix.resume_mode); assert(false); break; } diff --git a/src/fenix_util.c b/src/fenix_util.cpp similarity index 98% rename from src/fenix_util.c rename to src/fenix_util.cpp index b56d237..246ea3f 100644 --- a/src/fenix_util.c +++ b/src/fenix_util.cpp @@ -54,9 +54,9 @@ //@HEADER */ -#include "fenix_opt.h" -#include "fenix_process_recovery.h" -#include "fenix_util.h" +#include "fenix_opt.hpp" +#include "fenix_process_recovery.hpp" +#include "fenix_util.hpp" char* logname; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c4f2e92..1d59516 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,7 +1,8 @@ -add_subdirectory(subset_internal) -add_subdirectory(subset_merging) +add_subdirectory(subset) add_subdirectory(request_tracking) add_subdirectory(request_cancelled) add_subdirectory(no_jump) add_subdirectory(issend) add_subdirectory(failed_spares) +add_subdirectory(exception_throw) +add_subdirectory(storev) diff --git a/test/subset_internal/CMakeLists.txt b/test/exception_throw/CMakeLists.txt similarity index 58% rename from test/subset_internal/CMakeLists.txt rename to test/exception_throw/CMakeLists.txt index 4dcfc28..7cd5a58 100644 --- a/test/subset_internal/CMakeLists.txt +++ b/test/exception_throw/CMakeLists.txt @@ -7,7 +7,9 @@ # For more information, see the LICENSE file in the top Fenix # directory. # -add_executable(fenix_subset_internal_test fenix_subset_internal_test.c) -target_link_libraries(fenix_subset_internal_test fenix) -add_test(subset_internal fenix_subset_internal_test "100" "3" "5" "7" "10") +add_executable(fenix_exceptions fenix_exceptions.cpp) +target_link_libraries(fenix_exceptions fenix MPI::MPI_CXX) + +add_test(NAME exception_throw + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_exceptions ${MPIEXEC_POSTFLAGS}) diff --git a/include/fenix_opt.h b/test/exception_throw/fenix_exceptions.cpp similarity index 73% rename from include/fenix_opt.h rename to test/exception_throw/fenix_exceptions.cpp index b032b02..11d4019 100644 --- a/include/fenix_opt.h +++ b/test/exception_throw/fenix_exceptions.cpp @@ -44,7 +44,7 @@ // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, // Michael Heroux, and Matthew Whitlock // // Questions? Contact Keita Teranishi (knteran@sandia.gov) and @@ -54,34 +54,48 @@ //@HEADER */ -#ifndef __FENIX_OPT__ -#define __FENIX_OPT__ +#include +#include #include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include + +int main(int argc, char **argv) { + volatile int status = 0; + + MPI_Init(&argc, &argv); -#define debug_print(fmt, ...) \ - do { fprintf(stderr, "%s: %d: %s(): " fmt, __FILE__, \ - __LINE__, __func__, __VA_ARGS__); } while (0) + int fenix_role, error; + MPI_Comm res_comm; + MPI_Info info; + MPI_Info_create(&info); + MPI_Info_set(info, "FENIX_RESUME_MODE", "THROW"); + Fenix_Init(&fenix_role, MPI_COMM_WORLD, &res_comm, &argc, &argv, 0, 0, info, &error); -#define verbose_print(fmt, ...) \ - do { printf("%s(): " fmt, __func__, __VA_ARGS__); } while (0) + if(fenix_role == FENIX_ROLE_SURVIVOR_RANK){ + printf("FAILURE: longjmp instead of exception\n"); + status = 1; + } -typedef struct __fenix_debug_opt_t { - int verbose; -} fenix_debug_opt_t; + if (fenix_role == FENIX_ROLE_INITIAL_RANK) { + int rank; + MPI_Comm_rank(res_comm, &rank); + if(rank == 1) raise(SIGKILL); + try { + MPI_Barrier(res_comm); + printf("FAILURE: barrier finished without fault\n"); + status = 1; + } catch (Fenix::CommException e){ + printf("SUCCESS: caught CommException\n"); + } + } -void __fenix_init_opt(int argc, char **argv); + Fenix_Finalize(); + MPI_Finalize(); -#endif + return status; +} diff --git a/test/failed_spares/CMakeLists.txt b/test/failed_spares/CMakeLists.txt index 8fd95b3..46e4f4c 100644 --- a/test/failed_spares/CMakeLists.txt +++ b/test/failed_spares/CMakeLists.txt @@ -9,7 +9,7 @@ # add_executable(fenix_failed_spares fenix_failed_spares.c) -target_link_libraries(fenix_failed_spares fenix MPI::MPI_C) +target_link_libraries(fenix_failed_spares fenix MPI::MPI_CXX) add_test(NAME failed_spares COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_failed_spares ${MPIEXEC_POSTFLAGS} 3 1 3 4 ) diff --git a/test/failed_spares/fenix_failed_spares.c b/test/failed_spares/fenix_failed_spares.c index bea1dd7..6b202aa 100644 --- a/test/failed_spares/fenix_failed_spares.c +++ b/test/failed_spares/fenix_failed_spares.c @@ -61,6 +61,7 @@ #include #include #include +#include const int kKillID = 1; diff --git a/test/issend/CMakeLists.txt b/test/issend/CMakeLists.txt index f141d40..2b44c38 100644 --- a/test/issend/CMakeLists.txt +++ b/test/issend/CMakeLists.txt @@ -9,6 +9,6 @@ # add_executable(fenix_issend_test fenix_issend_test.c) -target_link_libraries(fenix_issend_test fenix MPI::MPI_C) +target_link_libraries(fenix_issend_test fenix MPI::MPI_CXX) add_test(NAME issend COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_issend_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/issend/fenix_issend_test.c b/test/issend/fenix_issend_test.c index 0159297..23f3d85 100644 --- a/test/issend/fenix_issend_test.c +++ b/test/issend/fenix_issend_test.c @@ -61,6 +61,7 @@ #include #include #include +#include const int kKillID = 1; @@ -86,7 +87,7 @@ int main(int argc, char **argv) { MPI_Info info; MPI_Info_create(&info); - MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + MPI_Info_set(info, "FENIX_RESUME_MODE", "RETURN"); int fenix_status; int recovered = 0; diff --git a/test/message_replay/CMakeLists.txt b/test/message_replay/CMakeLists.txt new file mode 100644 index 0000000..f9f2c11 --- /dev/null +++ b/test/message_replay/CMakeLists.txt @@ -0,0 +1,14 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# + +add_executable(fenix_message_logging_test fenix_message_logging_test.cxx) +target_link_libraries(fenix_message_logging_test fenix MPI::MPI_CXX) + +add_test(NAME request_cancelled COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_message_logging_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/no_jump/CMakeLists.txt b/test/no_jump/CMakeLists.txt index dfc9311..250059f 100644 --- a/test/no_jump/CMakeLists.txt +++ b/test/no_jump/CMakeLists.txt @@ -9,6 +9,6 @@ # add_executable(fenix_no_jump_test fenix_no_jump_test.c) -target_link_libraries(fenix_no_jump_test fenix MPI::MPI_C) +target_link_libraries(fenix_no_jump_test fenix MPI::MPI_CXX) add_test(NAME no_jump COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_no_jump_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/no_jump/fenix_no_jump_test.c b/test/no_jump/fenix_no_jump_test.c index 82187a3..cf5d261 100644 --- a/test/no_jump/fenix_no_jump_test.c +++ b/test/no_jump/fenix_no_jump_test.c @@ -61,6 +61,7 @@ #include #include #include +#include const int kKillID = 1; @@ -86,7 +87,7 @@ int main(int argc, char **argv) { MPI_Info info; MPI_Info_create(&info); - MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP"); + MPI_Info_set(info, "FENIX_RESUME_MODE", "RETURN"); int fenix_status; int recovered = 0; diff --git a/test/request_cancelled/CMakeLists.txt b/test/request_cancelled/CMakeLists.txt index 97dd331..f20dee8 100644 --- a/test/request_cancelled/CMakeLists.txt +++ b/test/request_cancelled/CMakeLists.txt @@ -9,6 +9,6 @@ # add_executable(fenix_request_cancelled_test fenix_req_cancelled_test.c) -target_link_libraries(fenix_request_cancelled_test fenix MPI::MPI_C) +target_link_libraries(fenix_request_cancelled_test fenix MPI::MPI_CXX) add_test(NAME request_cancelled COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_request_cancelled_test ${MPIEXEC_POSTFLAGS} "1") diff --git a/test/request_cancelled/fenix_req_cancelled_test.c b/test/request_cancelled/fenix_req_cancelled_test.c index 3d7c89a..554f7f4 100644 --- a/test/request_cancelled/fenix_req_cancelled_test.c +++ b/test/request_cancelled/fenix_req_cancelled_test.c @@ -60,6 +60,7 @@ #include #include #include +#include const int kKillID = 1; diff --git a/test/request_tracking/CMakeLists.txt b/test/request_tracking/CMakeLists.txt index 8d008ed..694bc99 100644 --- a/test/request_tracking/CMakeLists.txt +++ b/test/request_tracking/CMakeLists.txt @@ -9,7 +9,7 @@ # add_executable(fenix_request_tracking_test fenix_request_tracking_test.c) -target_link_libraries(fenix_request_tracking_test fenix MPI::MPI_C) +target_link_libraries(fenix_request_tracking_test fenix MPI::MPI_CXX) add_test(NAME request_tracking COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} fenix_request_tracking_test ${MPIEXEC_POSTFLAGS}) diff --git a/test/request_tracking/fenix_request_tracking_test.c b/test/request_tracking/fenix_request_tracking_test.c index f279e01..ae0dc11 100644 --- a/test/request_tracking/fenix_request_tracking_test.c +++ b/test/request_tracking/fenix_request_tracking_test.c @@ -4,6 +4,7 @@ #include #include #include //for memcpy +#include #ifndef RTT_NO_FENIX #include diff --git a/test/subset_merging/CMakeLists.txt b/test/storev/CMakeLists.txt similarity index 63% rename from test/subset_merging/CMakeLists.txt rename to test/storev/CMakeLists.txt index 603686e..d87a3c9 100644 --- a/test/subset_merging/CMakeLists.txt +++ b/test/storev/CMakeLists.txt @@ -8,7 +8,8 @@ # directory. # -add_executable(fenix_subset_merging_test fenix_subset_merging_test.c) -target_link_libraries(fenix_subset_merging_test fenix) +add_executable(storev storev.cpp) +target_link_libraries(storev fenix MPI::MPI_CXX) -add_test(subset_merging fenix_subset_merging_test) +add_test(NAME storev + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} storev ${MPIEXEC_POSTFLAGS}) diff --git a/test/storev/storev.cpp b/test/storev/storev.cpp new file mode 100644 index 0000000..991741a --- /dev/null +++ b/test/storev/storev.cpp @@ -0,0 +1,201 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar, +// Michael Heroux, and Matthew Whitlock +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr int kKillID = 2; +constexpr int my_group = 0; +constexpr int my_member = 0; +constexpr int start_timestamp = 0; +constexpr int group_depth = 1; +int errflag; + +using Fenix::DataSubset; +using namespace Fenix::Data; + +int main(int argc, char **argv) { + MPI_Init(&argc, &argv); + + MPI_Comm res_comm; + Fenix::init({.out_comm = &res_comm, .spares = 1}); + + int num_ranks, rank; + MPI_Comm_size(res_comm, &num_ranks); + MPI_Comm_rank(res_comm, &rank); + + std::vector data; + + bool should_throw = Fenix_get_role() == FENIX_ROLE_RECOVERED_RANK; + while(true) try { + if(should_throw){ + should_throw = false; + Fenix::throw_exception(); + } + + //Initial work and commits + if(Fenix_get_role() == FENIX_ROLE_INITIAL_RANK){ + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + Fenix_Data_member_create( + my_group, my_member, data.data(), FENIX_RESIZEABLE, MPI_INT + ); + + data.resize(100 + rank); + for(int& i : data) i = -1; + + + //Store the whole array first. We need to keep our buffer pointer updated + //since resizing an array can change it + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_storev(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + //Now commit a smaller portion with different data. + data.resize(50 + rank); + int val = 1; + for(int& i : data) i = val++; + + Fenix_Data_member_attr_set( + my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER, data.data(), + &errflag + ); + member_storev(my_group, my_member, {{0, data.size()-1}}); + Fenix_Data_commit_barrier(my_group, NULL); + + + if(rank == kKillID){ + fprintf(stderr, "Doing kill on node %d\n", rank); + raise(SIGTERM); + } + } + + Fenix_Finalize(); + break; + } catch (const Fenix::CommException& e) { + const Fenix::CommException* err = &e; + while(true) try { + //We've had a failure! Time to recover data. + fprintf(stderr, "Starting data recovery on rank %d\n", rank); + if(err->fenix_err != FENIX_SUCCESS){ + fprintf(stderr, "FAILURE on Fenix Init (%d). Exiting.\n", err->fenix_err); + exit(1); + } + + Fenix_Data_group_create( + my_group, res_comm, start_timestamp, group_depth, FENIX_DATA_POLICY_IMR, + NULL, &errflag + ); + + //Do a null restore to get information about the stored subset + DataSubset stored_subset; + int ret = member_restore( + my_group, my_member, nullptr, 0, FENIX_TIME_STAMP_MAX, stored_subset + ); + if(ret != FENIX_SUCCESS) { + fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, ret); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + //Resize data to fit all stored data + data.resize(stored_subset.end()+1); + + //Set all data to a value that was never stored, just for testing + for(int& i : data) i = -2; + + //Now do an lrestore to get the recovered data. + ret = member_lrestore( + my_group, my_member, data.data(), data.size(), FENIX_TIME_STAMP_MAX, + stored_subset + ); + + break; + } catch (const Fenix::CommException& nested){ + err = &nested; + } + } + + //Ensure data is correct after execution and recovery + bool successful = data.size() == 50 + rank; + if(!successful) printf("Rank %d expected data size 50, but got %d\n", rank, data.size()); + + for(int i = 0; i < data.size() && successful; i++){ + successful &= data[i] == i+1; + if(!successful) printf("Rank %d data[%d]=%d, but should be %d!\n", rank, i, data[i], i+1); + } + + if(successful){ + printf("Rank %d successfully recovered\n", rank); + } else { + printf("FAILURE on rank %d\n", rank); + } + + MPI_Finalize(); + return !successful; //return error status +} diff --git a/test/subset/CMakeLists.txt b/test/subset/CMakeLists.txt new file mode 100644 index 0000000..5029cfd --- /dev/null +++ b/test/subset/CMakeLists.txt @@ -0,0 +1,24 @@ +# +# This file is part of Fenix +# Copyright (c) 2016 Rutgers University and Sandia Corporation. +# This software is distributed under the BSD License. +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# For more information, see the LICENSE file in the top Fenix +# directory. +# +add_executable(fenix_subset_includes subset_includes.cpp) +target_link_libraries(fenix_subset_includes fenix) +add_test(subset_includes fenix_subset_includes) + +add_executable(fenix_subset_subtraction subset_subtraction.cpp) +target_link_libraries(fenix_subset_subtraction fenix) +add_test(subset_subtraction fenix_subset_subtraction) + +add_executable(fenix_subset_copy subset_copy.cpp) +target_link_libraries(fenix_subset_copy fenix) +add_test(subset_copy fenix_subset_copy) + +add_executable(fenix_subset_addition subset_addition.cpp) +target_link_libraries(fenix_subset_addition fenix) +add_test(subset_addition fenix_subset_addition) diff --git a/test/subset_internal/fenix_subset_internal_test.c b/test/subset/subset_addition.cpp similarity index 50% rename from test/subset_internal/fenix_subset_internal_test.c rename to test/subset/subset_addition.cpp index 5dab905..2491631 100644 --- a/test/subset_internal/fenix_subset_internal_test.c +++ b/test/subset/subset_addition.cpp @@ -56,7 +56,6 @@ */ #include -#include // Never called explicitly by the users #include #include #include @@ -64,95 +63,76 @@ #include #include -int _verify_subset( double *data, int num_blocks, int start_offset, int end_offset, int stride, - Fenix_Data_subset *subset_specifier ); +#include -int _verify_subset( double *data, int num_repeats, int start_offset, int end_offset, int stride, - Fenix_Data_subset *sp) -{ - int i, j; - int idx; - int block_size; - int flag = 0; - double accumulator =0.0; +#include "subset_common.hpp" - if( num_repeats != sp->num_repeats[0]+1 ) { - flag = 1; - printf("num_repeats set incorrectly."); - } - if( start_offset != sp->start_offsets[0] ) { - flag = 2; - printf("start_offset set incorrectly\n"); - } - if( end_offset != sp->end_offsets[0]){ - flag = 3; - printf("end_offset set incorrectly\n"); - } - if( sp->specifier != __FENIX_SUBSET_CREATE ) { - flag = 4; - printf("specifier set incorrectly\n"); - } - if(stride != sp->stride){ - flag = 5; - printf("stride set incorrectly\n"); +using namespace Fenix; + +bool test_addition(const DataSubset& a, const DataSubset& b){ + printf("Testing subsets a=%s, b=%s\n", a.str().c_str(), b.str().c_str()); + + const DataSubset c = a + b; + const DataSubset d = b + a; + + printf("c=a+b=%s\n", c.str().c_str()); + printf("d=b+a=%s\n", d.str().c_str()); + + if(c != d){ + printf("a+b != b+a\n"); + return false; } - /* Iterate over the loop to see if any memory error occurs*/ - idx = start_offset; - block_size = end_offset - start_offset; - for ( i = 0; i < num_repeats; i++ ) { - for( j = 0; j < block_size; j++ ) { - accumulator += data[idx+j]; + size_t start = std::min(a.start(), b.start()); + size_t end; + if(a.end() == -1 || b.end() == -1){ + end = start+1000; + } else { + end = std::max(a.end(), b.end()) + 10; + } + + for(int i = start; i <= end; i++){ + if(c.includes(i) != (a.includes(i) || b.includes(i))){ + if(c.includes(i)){ + printf("c=a+b incorrectly includes index %d not in a or b\n", i); + return false; + } else { + printf( + "c=a+b incorrectly excludes index %d in %s\n", i, + a.includes(i) ? b.includes(i) ? "both" : "a" : "b" + ); + return false; + } + } + if(d.includes(i) != (a.includes(i) || b.includes(i))){ + if(d.includes(i)){ + printf("d=b+a incorrectly includes index %d not in a or b\n", i); + return false; + } else { + printf( + "d=b+a incorrectly excludes index %d in %s\n", i, + a.includes(i) ? b.includes(i) ? "both" : "a" : "b" + ); + return false; + } } - idx += stride; } - return flag; + return true; } - int main(int argc, char **argv) { - Fenix_Data_subset subset_specifier; - int num_blocks; - int start_offset, end_offset, stride; - int space_size; - double *d_space; - - if (argc < 6) { - printf("Usage: %s <# blocks> \n", *argv); - exit(0); - } - - space_size = atoi(argv[1]); - num_blocks = atoi(argv[2]); - start_offset = atoi(argv[3]); - end_offset = atoi(argv[4]); - stride = atoi(argv[5]); + bool success = true; - if( space_size < (num_blocks * stride + start_offset) ) { - printf("Error: Array size is smaller than (the number of blocks x stride) + start_offset\n"); - printf("Aborting\n"); - exit(0); + auto subsets = get_subsets(); + for(const auto& a : subsets){ + for(const auto& b : subsets){ + success &= test_addition(a, b); + } } - if( start_offset > end_offset ) { - printf("Error: Start offset must be less than end_offset\n"); - printf("Aborting\n"); - exit(0); - } - - d_space = (double *)malloc(sizeof(double)*space_size); - Fenix_Data_subset_create(num_blocks, start_offset, end_offset, stride, &subset_specifier); - //data_subset_create( num_blocks, start_offset, end_offset, stride, &subset_specifier ); - // Verification - int err_code = _verify_subset( d_space, num_blocks, start_offset, end_offset, stride, &subset_specifier ); - // free_data_subset_fixed ( &subset_specifier); - free(d_space); - if( err_code == 0 ) { - printf("Passed\n"); - } - return err_code; + return success ? 0 : 1; } diff --git a/test/subset/subset_common.hpp b/test/subset/subset_common.hpp new file mode 100644 index 0000000..2076fc3 --- /dev/null +++ b/test/subset/subset_common.hpp @@ -0,0 +1,79 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +#include + +//Returns a variety of subsets to perform tests on +static std::vector get_subsets(){ + using namespace Fenix; + std::vector ret; + ret.push_back(DataSubset()); + ret.push_back(DataSubset(10)); + ret.push_back(DataSubset(-1)); + ret.push_back(DataSubset({0, 10})); + ret.push_back(DataSubset({0, -1})); + ret.push_back(DataSubset({5, 10})); + ret.push_back(DataSubset({5, -1})); + ret.push_back(DataSubset({0, 4}, 2, 5)); + ret.push_back(DataSubset({0, 4}, 2, 6)); + ret.push_back(DataSubset({0, 4}, 10, 6)); + ret.push_back(DataSubset({0, 4}, 10, 10)); + return ret; +} diff --git a/test/subset/subset_copy.cpp b/test/subset/subset_copy.cpp new file mode 100644 index 0000000..2b7ae38 --- /dev/null +++ b/test/subset/subset_copy.cpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "subset_common.hpp" + +using namespace Fenix; + +bool test_copy(const DataSubset& a){ + size_t count = a.max_count(); + if(count == 0) count = 1000; + + std::vector in, out; + in.resize(count); + out.resize(count); + + DataBuffer buf; + + for(int& i : in) i = 1; + for(int& i : out) i = 0; + + a.copy_data(sizeof(int), count, (char*)in.data(), buf); + a.copy_data(sizeof(int), buf, count, (char*)out.data()); + + for(int i = 0; i < count; i++){ + if(a.includes(i) && out[i] != 1){ + printf("Failed to transfer index %d\n", i); + return false; + } else if(!a.includes(i) && out[i] != 0){ + printf("Incorrectly transfered index %d\n", i); + return false; + } + } + + return true; +} + +int main(int argc, char **argv) +{ + bool success = true; + + auto subsets = get_subsets(); + for(const auto& a : subsets){ + success &= test_copy(a); + } + + return success ? 0 : 1; +} + + diff --git a/test/subset/subset_includes.cpp b/test/subset/subset_includes.cpp new file mode 100644 index 0000000..332967d --- /dev/null +++ b/test/subset/subset_includes.cpp @@ -0,0 +1,166 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +using namespace Fenix; + +bool test_unstrided(int start, int end){ + printf("Testing subset [%d, %d] \n", start, end); + std::cout.flush(); + + DataSubset s({start, end}); + for(int i = 0; i < start; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d] incorrectly includes %d\n", start, end, i); + return false; + } + } + + int test_end = end == -1 ? start+20 : end; + for(int i = start; i <= test_end; i++){ + if(!s.includes(i)){ + printf("Subset [%d, %d] incorrectly excludes %d\n", start, end, i); + return false; + } + } + + if(end != -1){ + for(int i = end+1; i <= test_end+20; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d] incorrectly includes %d\n", start, end, i); + return false; + } + } + } + + return true; +} + +bool test_strided(int start, int end, int count, int stride){ + printf("Testing subset [%d, %d]x%d stride %d \n", start, end, count, stride); + std::cout.flush(); + + DataSubset s({start, end}, count, stride); + + for(int i = 0; i < start; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly includes %d\n", start, end, count, stride, i); + return false; + } + } + for(int b = 0; b < count && b < 5; b++){ + int b_start = start+b*stride; + int b_end = end+b*stride; + for(int i = b_start; i <= b_end; i++){ + if(!s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly excludes %d\n", start, end, count, stride, i); + return false; + } + } + } + for(int b = 0; b < count-1 && b < 5; b++) { + int b_end = end+b*stride; + int next_b_start = start + (b+1)*stride; + for(int i = b_end+1; i < next_b_start; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly includes %d\n", start, end, count, stride, i); + return false; + } + } + } + + int range_end = end+(count-1)*stride; + for(int i = range_end+1; i < range_end+1+stride*2; i++){ + if(s.includes(i)){ + printf("Subset [%d, %d]x%d stride %d incorrectly includes %d\n", start, end, count, stride, i); + return false; + } + } + + return true; +} + +int main(int argc, char **argv) +{ + bool success = true; + + success &= test_unstrided(0, 10); + success &= test_unstrided(5, 10); + success &= test_unstrided(10, 10); + success &= test_unstrided(0, -1); + success &= test_unstrided(10, -1); + + success &= test_strided(0, 4, 2, 5); + success &= test_strided(0, 4, 2, 6); + success &= test_strided(0, 4, 10, 6); + success &= test_strided(0, 4, 10, 10); + + return success ? 0 : 1; +} + + diff --git a/test/subset/subset_subtraction.cpp b/test/subset/subset_subtraction.cpp new file mode 100644 index 0000000..c6c6cb9 --- /dev/null +++ b/test/subset/subset_subtraction.cpp @@ -0,0 +1,119 @@ +/* +//@HEADER +// ************************************************************************ +// +// +// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _| +// _| _| _|_| _| _| _| _| +// _|_|_| _|_|_| _| _| _| _| _| +// _| _| _| _|_| _| _| _| +// _| _|_|_|_| _| _| _|_|_| _| _| +// +// +// +// +// Copyright (C) 2016 Rutgers University and Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS +// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar +// and Michael Heroux +// +// Questions? Contact Keita Teranishi (knteran@sandia.gov) and +// Marc Gamell (mgamell@cac.rutgers.edu) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "subset_common.hpp" + +using namespace Fenix; + +bool test_subtraction(const DataSubset& a, const DataSubset& b){ + const DataSubset c = a - b; + + size_t start = std::min(a.start(), b.start()); + size_t end; + if(a.end() == -1 || b.end() == -1){ + end = start+1000; + } else { + end = std::max(a.end(), b.end()) + 10; + } + + for(int i = start; i <= end; i++){ + if(c.includes(i) && !a.includes(i)){ + if(!a.includes(i)){ + printf("Result of a - b incorrectly includes index not in a\n"); + return false; + } + if(b.includes(i)){ + printf("Result of a - b incorrectly includes index in b\n"); + return false; + } + } + if(!c.includes(i)){ + if(a.includes(i) && !b.includes(i)){ + printf("Result of a - b incorrectly excludes index in a but not b\n"); + return false; + } + } + } + + return true; +} + +int main(int argc, char **argv) +{ + bool success = true; + + auto subsets = get_subsets(); + for(const auto& a : subsets){ + for(const auto& b : subsets){ + success &= test_subtraction(a, b); + } + } + + return success ? 0 : 1; +} + + diff --git a/test/subset_merging/fenix_subset_merging_test.c b/test/subset_merging/fenix_subset_merging_test.c deleted file mode 100644 index 8ed6cd6..0000000 --- a/test/subset_merging/fenix_subset_merging_test.c +++ /dev/null @@ -1,176 +0,0 @@ -#include -#include -#include -void print_subset(Fenix_Data_subset *ss){ - printf("\tnum_blocks:\t %d\n", ss->num_blocks); - printf("\tstride:\t\t %d\n", ss->stride); - printf("\tspecifier:\t %d\n", ss->specifier); - printf("\tstart_offsets:\t ["); - for(int i = 0; i < ss->num_blocks; i++){ - printf( (i==0) ? "%d" : ", %d", ss->start_offsets[i]); - } - printf("]\n"); - printf("\tend_offsets:\t ["); - for(int i = 0; i < ss->num_blocks; i++){ - printf( (i==0) ? "%d" : ", %d", ss->end_offsets[i]); - } - printf("]\n"); - printf("\tnum_repeats:\t ["); - for(int i = 0; i < ss->num_blocks; i++){ - printf( (i==0) ? "%d" : ", %d", ss->num_repeats[i]); - } - printf("]\n"); -} - -int test_subset_main(Fenix_Data_subset *ss, int num_blocks, - int *start_offsets, int *end_offsets, int *num_repeats){ - if(ss->num_blocks != num_blocks){ - //Wrong, and we don't want to keep checking or we might segfault - return 0; - } - - //Current implementation maintains ordering, so this assumes the - //tester knows the expected output order. - int success = 1; - for(int i = 0; (i < num_blocks) && success; i++){ - success = success && ss->start_offsets[i] == start_offsets[i]; - success = success && ss->end_offsets[i] == end_offsets[i]; - success = success && ss->start_offsets[i] == start_offsets[i]; - } - - return success; -} - -int test_subset_create( Fenix_Data_subset *sub1, Fenix_Data_subset *sub2, - Fenix_Data_subset *sub3, int num_blocks, int stride, - int *start_offsets, int *end_offsets, int *num_repeats){ - int success = 1; - success = success && test_subset_main(sub3, num_blocks, start_offsets, end_offsets, num_repeats); - success = success && sub3->specifier == __FENIX_SUBSET_CREATE; - success = success && sub3->stride == stride; - - if(!success){ - printf("ERROR!\n"); - printf("sub1: \n"); - print_subset(sub1); - printf("sub2: \n"); - print_subset(sub2); - printf("sub3: \n"); - print_subset(sub3); - } else { - printf("Success\n"); - } - - __fenix_data_subset_free(sub1); - __fenix_data_subset_free(sub2); - __fenix_data_subset_free(sub3); - - return !success; //return failure status -} - -int test_subset_createv( Fenix_Data_subset *sub1, Fenix_Data_subset *sub2, - Fenix_Data_subset *sub3, int num_blocks, - int *start_offsets, int *end_offsets){ - int success = 1; - int* zeros = calloc(num_blocks, sizeof(int)); - success = success && test_subset_main(sub3, num_blocks, start_offsets, end_offsets, zeros); - free(zeros); - success = success && sub3->specifier == __FENIX_SUBSET_CREATEV; - - if(!success){ - printf("ERROR!\n"); - printf("sub1: \n"); - print_subset(sub1); - printf("sub2: \n"); - print_subset(sub2); - printf("sub3: \n"); - print_subset(sub3); - } else { - printf("Success\n"); - } - - __fenix_data_subset_free(sub1); - __fenix_data_subset_free(sub2); - __fenix_data_subset_free(sub3); - - return !success; -} - -int main(int argc, char **argv) { - Fenix_Data_subset sub1; - Fenix_Data_subset sub2; - Fenix_Data_subset sub3; - - int failure = 0; - - printf("Testing equivalent create subsets of same size & location: "); - Fenix_Data_subset_create(3, 2, 5, 5, &sub1); - Fenix_Data_subset_create(3, 2, 5, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){2}, (int[]){5}, (int[]){2}); - - printf("Testing equivalent create subsets, one within another: "); - Fenix_Data_subset_create(1, 17, 20, 5, &sub1); - Fenix_Data_subset_create(3, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){2}); - - printf("Testing equivalent create subsets in non-overlapping, continuous regions: "); - Fenix_Data_subset_create(1, 22, 25, 5, &sub1); - Fenix_Data_subset_create(2, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){2}); - - printf("Testing equivalent create subsets in non-overlapping, non-continuous regions: "); - Fenix_Data_subset_create(1, 22, 25, 5, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 2, 5, (int[]){22, 12}, (int[]){25, 15}, (int[]){1,0}); - - printf("Testing create subsets of same location: "); - Fenix_Data_subset_create(1, 13, 15, 5, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){0}); - - printf("Testing distinct create subsets with same stride: "); - Fenix_Data_subset_create(1, 17, 19, 5, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 2, 5, (int[]){17, 12}, (int[]){19, 15}, (int[]){0, 0}); - - printf("Testing distinct, overlapping create subsets with same stride: "); - Fenix_Data_subset_create(1, 17, 19, 5, &sub1); - Fenix_Data_subset_create(2, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_create(&sub1, &sub2, &sub3, 1, 5, (int[]){12}, (int[]){15}, (int[]){1}); - - printf("Testing distinct create subsets with unique stride: "); - Fenix_Data_subset_create(1, 17, 19, 6, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 2, (int[]){17, 12}, (int[]){19, 15}); - - printf("Testing distinct overlapping create subsets with unique stride: "); - Fenix_Data_subset_create(1, 13, 16, 6, &sub1); - Fenix_Data_subset_create(1, 12, 15, 5, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 1, (int[]){12}, (int[]){16}); - - printf("Testing complex createv subsets: "); - Fenix_Data_subset_createv(4, (int[]){1, 4, 21, 23}, (int[]){2, 17, 25, 26}, &sub1); - Fenix_Data_subset_createv(3, (int[]){0, 18, 30}, (int[]){1, 19, 30}, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 4, (int[]){0, 4, 21, 30}, (int[]){2, 19, 26, 30}); - - printf("Testing complex create and createv together: "); - Fenix_Data_subset_create(4, 11, 13, 10, &sub1); - Fenix_Data_subset_createv(3, (int[]){0, 12, 31}, (int[]){1, 20, 31}, &sub2); - __fenix_data_subset_merge(&sub1, &sub2, &sub3); - failure += test_subset_createv(&sub1, &sub2, &sub3, 4, (int[]){11, 31, 41, 0}, (int[]){23, 33, 43, 1}); - - - Fenix_Data_subset_delete(&sub1); - - return failure; -}