Parallel memcpy in cpp

I am trying to copy a matrix in parallel. Below is the code that I am working with. Currently, it works as expected with char, but it seg faults when I use shorts. I assume that the bug is in copying outside of the memory outside of the vector. I have tried to debug my assumption without success.

CMakeLists.txt

cmake_minimum_required(VERSION 3.0)

project(memcpy CXX)

find_package (Threads)

add_executable(memcpy main.cpp)

set_property(TARGET memcpy PROPERTY CXX_STANDARD 17)

target_link_libraries (memcpy ${CMAKE_THREAD_LIBS_INIT})

main.cpp

#include <cassert>

#include <condition_variable>

#include <cstring>

#include <iostream>

#include <mutex>

#include <string>

#include <thread>

#include <vector>





class Barrier {

  public:

    explicit Barrier(std::size_t const count) : m_threshold(count), m_remaining(count), m_generation(0) {}



    void wait() {

        auto local = std::unique_lock<std::mutex>{m_mutex};

        auto current_generation = m_generation;



        m_remaining--;

        if (!m_remaining) {

            m_generation++;

            m_remaining = m_threshold;

            m_condition.notify_all();

        } else {

            m_condition.wait(local, [this, current_generation] { return current_generation != m_generation; });

        }

    }



  private:

    std::mutex m_mutex;

    std::condition_variable m_condition;

    std::size_t m_threshold;

    std::size_t m_remaining;

    std::size_t m_generation;

};





template <typename T>

class Matrix {

    using reference = typename std::vector<T>::reference;

    using const_reference = typename std::vector<T>::const_reference;



  public:

    Matrix(std::size_t rows, std::size_t cols) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows) {}

    Matrix(std::size_t rows, std::size_t cols, T const& default_val) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows, default_val) {}



    constexpr std::size_t get_columns() const { return m_cols; }

    constexpr std::size_t get_rows() const { return m_rows; }

    constexpr std::size_t get_element_count() const {

        assert(m_cols * m_rows == m_data.size());

        return m_cols * m_rows;

    }



    T* data() { return m_data.data(); }

    T const* data() const { return m_data.data(); }



    reference operator()(std::size_t const column_x, std::size_t const row_y) {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



    const_reference operator()(std::size_t const column_x, std::size_t const row_y) const {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



  private:

    std::size_t const m_rows;

    std::size_t const m_cols;

    std::vector<T> m_data;

};





static_assert(false, "FIX ME");

using T = char;

// using T = short;

// using T = int;

// using T = double;





void run(std::size_t const my_rank, std::size_t const num_threads, Barrier& barrier, Matrix<T> const& from_data, Matrix<T>& to_data) {

    auto n = from_data.get_element_count();

    std::string str;



    if (my_rank == 0) {

        std::cerr << "bytes to copy: " << (n * sizeof(T)) << 'n';

    }



    // initialization

    std::size_t segment_size = n / num_threads;

    std::size_t start = (my_rank * segment_size) * sizeof(T);

    std::size_t end = ((my_rank + 1) * segment_size) * sizeof(T);

    std::size_t distance = end - start;





    str += "  my_rank: " + std::to_string(my_rank);

    str += "  segment_size: " + std::to_string(segment_size);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  rank: " + std::to_string(my_rank);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  e: " + std::to_string(start + distance);

    str += "n";

    std::cerr << str;



    barrier.wait();

    std::memcpy(to_data.data() + start, from_data.data() + start, distance);

    barrier.wait();





    if (my_rank == 0)

        for (auto y = 0; y < from_data.get_rows(); y++) {

            for (auto x = 0; x < from_data.get_columns(); x++) {

                if (to_data(x, y) != from_data(x, y)) {

                    std::cerr << "x: " << x << 't' << "y: " << y << "tt";

                    std::cerr << "to: " << to_data(x, y) << 't' << "from: " << from_data(x, y) << 'n';

                }

            }

        }



    barrier.wait();

}





int main() {

    auto const num_threads = 1;

    // auto const num_threads = 4;



    // auto const width = 64;

    // auto const height = 64;

    auto const width = 97;

    auto const height = 101;



    auto from_data = Matrix<T>(width, height, 70);

    auto to_data = Matrix<T>(width, height, 84);



    std::vector<std::thread> threads;

    auto barrier = Barrier{num_threads};

    for (auto i = 0; i < num_threads; i++) {

        threads.emplace_back(run, i, num_threads, std::ref(barrier), std::ref(from_data), std::ref(to_data));

    }



    for (auto& thread : threads) {

        thread.join();

    }

}

edited Nov 15 '18 at 11:59

asked Nov 15 '18 at 3:53

Brandon

1931312

I doubt you will get a significant performance improvement from threads if your goal is just to duplicate data in memory... maybe if you use a number of threads equal to the number of memory channels but even so, CPU clocks are already about the double the memory's, which makes it four times faster considering you have to read and then write back, the memory bus is the bottleneck here, not the CPU.

– Havenard
Nov 15 '18 at 4:14

1

auto from_data = Matrix<std::string>(width, height, 70); -- Your code is instantly broken. If you had considered things like this, you would have never used memcpy. Never use std::memcpy if there is a chance that the thing you're copying could be non-POD. Compilers these days are smart enough to choose what type of copy to use when you use std::copy instead (either memcpy, a loop, etc.).

– PaulMcKenzie
Nov 15 '18 at 4:32

@Havenard The threads already exist at this point in the code and would have nothing to do but wait around for the memcpy to finish. (it's also for a university assignment)

– Brandon
Nov 15 '18 at 4:33

add a comment |

CMakeLists.txt

cmake_minimum_required(VERSION 3.0)

project(memcpy CXX)

find_package (Threads)

add_executable(memcpy main.cpp)

set_property(TARGET memcpy PROPERTY CXX_STANDARD 17)

target_link_libraries (memcpy ${CMAKE_THREAD_LIBS_INIT})

main.cpp

#include <cassert>

#include <condition_variable>

#include <cstring>

#include <iostream>

#include <mutex>

#include <string>

#include <thread>

#include <vector>





class Barrier {

  public:

    explicit Barrier(std::size_t const count) : m_threshold(count), m_remaining(count), m_generation(0) {}



    void wait() {

        auto local = std::unique_lock<std::mutex>{m_mutex};

        auto current_generation = m_generation;



        m_remaining--;

        if (!m_remaining) {

            m_generation++;

            m_remaining = m_threshold;

            m_condition.notify_all();

        } else {

            m_condition.wait(local, [this, current_generation] { return current_generation != m_generation; });

        }

    }



  private:

    std::mutex m_mutex;

    std::condition_variable m_condition;

    std::size_t m_threshold;

    std::size_t m_remaining;

    std::size_t m_generation;

};





template <typename T>

class Matrix {

    using reference = typename std::vector<T>::reference;

    using const_reference = typename std::vector<T>::const_reference;



  public:

    Matrix(std::size_t rows, std::size_t cols) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows) {}

    Matrix(std::size_t rows, std::size_t cols, T const& default_val) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows, default_val) {}



    constexpr std::size_t get_columns() const { return m_cols; }

    constexpr std::size_t get_rows() const { return m_rows; }

    constexpr std::size_t get_element_count() const {

        assert(m_cols * m_rows == m_data.size());

        return m_cols * m_rows;

    }



    T* data() { return m_data.data(); }

    T const* data() const { return m_data.data(); }



    reference operator()(std::size_t const column_x, std::size_t const row_y) {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



    const_reference operator()(std::size_t const column_x, std::size_t const row_y) const {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



  private:

    std::size_t const m_rows;

    std::size_t const m_cols;

    std::vector<T> m_data;

};





static_assert(false, "FIX ME");

using T = char;

// using T = short;

// using T = int;

// using T = double;





void run(std::size_t const my_rank, std::size_t const num_threads, Barrier& barrier, Matrix<T> const& from_data, Matrix<T>& to_data) {

    auto n = from_data.get_element_count();

    std::string str;



    if (my_rank == 0) {

        std::cerr << "bytes to copy: " << (n * sizeof(T)) << 'n';

    }



    // initialization

    std::size_t segment_size = n / num_threads;

    std::size_t start = (my_rank * segment_size) * sizeof(T);

    std::size_t end = ((my_rank + 1) * segment_size) * sizeof(T);

    std::size_t distance = end - start;





    str += "  my_rank: " + std::to_string(my_rank);

    str += "  segment_size: " + std::to_string(segment_size);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  rank: " + std::to_string(my_rank);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  e: " + std::to_string(start + distance);

    str += "n";

    std::cerr << str;



    barrier.wait();

    std::memcpy(to_data.data() + start, from_data.data() + start, distance);

    barrier.wait();





    if (my_rank == 0)

        for (auto y = 0; y < from_data.get_rows(); y++) {

            for (auto x = 0; x < from_data.get_columns(); x++) {

                if (to_data(x, y) != from_data(x, y)) {

                    std::cerr << "x: " << x << 't' << "y: " << y << "tt";

                    std::cerr << "to: " << to_data(x, y) << 't' << "from: " << from_data(x, y) << 'n';

                }

            }

        }



    barrier.wait();

}





int main() {

    auto const num_threads = 1;

    // auto const num_threads = 4;



    // auto const width = 64;

    // auto const height = 64;

    auto const width = 97;

    auto const height = 101;



    auto from_data = Matrix<T>(width, height, 70);

    auto to_data = Matrix<T>(width, height, 84);



    std::vector<std::thread> threads;

    auto barrier = Barrier{num_threads};

    for (auto i = 0; i < num_threads; i++) {

        threads.emplace_back(run, i, num_threads, std::ref(barrier), std::ref(from_data), std::ref(to_data));

    }



    for (auto& thread : threads) {

        thread.join();

    }

}

edited Nov 15 '18 at 11:59

asked Nov 15 '18 at 3:53

Brandon

1931312

I doubt you will get a significant performance improvement from threads if your goal is just to duplicate data in memory... maybe if you use a number of threads equal to the number of memory channels but even so, CPU clocks are already about the double the memory's, which makes it four times faster considering you have to read and then write back, the memory bus is the bottleneck here, not the CPU.

– Havenard
Nov 15 '18 at 4:14

1

auto from_data = Matrix<std::string>(width, height, 70); -- Your code is instantly broken. If you had considered things like this, you would have never used memcpy. Never use std::memcpy if there is a chance that the thing you're copying could be non-POD. Compilers these days are smart enough to choose what type of copy to use when you use std::copy instead (either memcpy, a loop, etc.).

– PaulMcKenzie
Nov 15 '18 at 4:32

@Havenard The threads already exist at this point in the code and would have nothing to do but wait around for the memcpy to finish. (it's also for a university assignment)

– Brandon
Nov 15 '18 at 4:33

add a comment |

CMakeLists.txt

cmake_minimum_required(VERSION 3.0)

project(memcpy CXX)

find_package (Threads)

add_executable(memcpy main.cpp)

set_property(TARGET memcpy PROPERTY CXX_STANDARD 17)

target_link_libraries (memcpy ${CMAKE_THREAD_LIBS_INIT})

main.cpp

#include <cassert>

#include <condition_variable>

#include <cstring>

#include <iostream>

#include <mutex>

#include <string>

#include <thread>

#include <vector>





class Barrier {

  public:

    explicit Barrier(std::size_t const count) : m_threshold(count), m_remaining(count), m_generation(0) {}



    void wait() {

        auto local = std::unique_lock<std::mutex>{m_mutex};

        auto current_generation = m_generation;



        m_remaining--;

        if (!m_remaining) {

            m_generation++;

            m_remaining = m_threshold;

            m_condition.notify_all();

        } else {

            m_condition.wait(local, [this, current_generation] { return current_generation != m_generation; });

        }

    }



  private:

    std::mutex m_mutex;

    std::condition_variable m_condition;

    std::size_t m_threshold;

    std::size_t m_remaining;

    std::size_t m_generation;

};





template <typename T>

class Matrix {

    using reference = typename std::vector<T>::reference;

    using const_reference = typename std::vector<T>::const_reference;



  public:

    Matrix(std::size_t rows, std::size_t cols) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows) {}

    Matrix(std::size_t rows, std::size_t cols, T const& default_val) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows, default_val) {}



    constexpr std::size_t get_columns() const { return m_cols; }

    constexpr std::size_t get_rows() const { return m_rows; }

    constexpr std::size_t get_element_count() const {

        assert(m_cols * m_rows == m_data.size());

        return m_cols * m_rows;

    }



    T* data() { return m_data.data(); }

    T const* data() const { return m_data.data(); }



    reference operator()(std::size_t const column_x, std::size_t const row_y) {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



    const_reference operator()(std::size_t const column_x, std::size_t const row_y) const {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



  private:

    std::size_t const m_rows;

    std::size_t const m_cols;

    std::vector<T> m_data;

};





static_assert(false, "FIX ME");

using T = char;

// using T = short;

// using T = int;

// using T = double;





void run(std::size_t const my_rank, std::size_t const num_threads, Barrier& barrier, Matrix<T> const& from_data, Matrix<T>& to_data) {

    auto n = from_data.get_element_count();

    std::string str;



    if (my_rank == 0) {

        std::cerr << "bytes to copy: " << (n * sizeof(T)) << 'n';

    }



    // initialization

    std::size_t segment_size = n / num_threads;

    std::size_t start = (my_rank * segment_size) * sizeof(T);

    std::size_t end = ((my_rank + 1) * segment_size) * sizeof(T);

    std::size_t distance = end - start;





    str += "  my_rank: " + std::to_string(my_rank);

    str += "  segment_size: " + std::to_string(segment_size);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  rank: " + std::to_string(my_rank);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  e: " + std::to_string(start + distance);

    str += "n";

    std::cerr << str;



    barrier.wait();

    std::memcpy(to_data.data() + start, from_data.data() + start, distance);

    barrier.wait();





    if (my_rank == 0)

        for (auto y = 0; y < from_data.get_rows(); y++) {

            for (auto x = 0; x < from_data.get_columns(); x++) {

                if (to_data(x, y) != from_data(x, y)) {

                    std::cerr << "x: " << x << 't' << "y: " << y << "tt";

                    std::cerr << "to: " << to_data(x, y) << 't' << "from: " << from_data(x, y) << 'n';

                }

            }

        }



    barrier.wait();

}





int main() {

    auto const num_threads = 1;

    // auto const num_threads = 4;



    // auto const width = 64;

    // auto const height = 64;

    auto const width = 97;

    auto const height = 101;



    auto from_data = Matrix<T>(width, height, 70);

    auto to_data = Matrix<T>(width, height, 84);



    std::vector<std::thread> threads;

    auto barrier = Barrier{num_threads};

    for (auto i = 0; i < num_threads; i++) {

        threads.emplace_back(run, i, num_threads, std::ref(barrier), std::ref(from_data), std::ref(to_data));

    }



    for (auto& thread : threads) {

        thread.join();

    }

}

edited Nov 15 '18 at 11:59

asked Nov 15 '18 at 3:53

Brandon

1931312

CMakeLists.txt

cmake_minimum_required(VERSION 3.0)

project(memcpy CXX)

find_package (Threads)

add_executable(memcpy main.cpp)

set_property(TARGET memcpy PROPERTY CXX_STANDARD 17)

target_link_libraries (memcpy ${CMAKE_THREAD_LIBS_INIT})

main.cpp

#include <cassert>

#include <condition_variable>

#include <cstring>

#include <iostream>

#include <mutex>

#include <string>

#include <thread>

#include <vector>





class Barrier {

  public:

    explicit Barrier(std::size_t const count) : m_threshold(count), m_remaining(count), m_generation(0) {}



    void wait() {

        auto local = std::unique_lock<std::mutex>{m_mutex};

        auto current_generation = m_generation;



        m_remaining--;

        if (!m_remaining) {

            m_generation++;

            m_remaining = m_threshold;

            m_condition.notify_all();

        } else {

            m_condition.wait(local, [this, current_generation] { return current_generation != m_generation; });

        }

    }



  private:

    std::mutex m_mutex;

    std::condition_variable m_condition;

    std::size_t m_threshold;

    std::size_t m_remaining;

    std::size_t m_generation;

};





template <typename T>

class Matrix {

    using reference = typename std::vector<T>::reference;

    using const_reference = typename std::vector<T>::const_reference;



  public:

    Matrix(std::size_t rows, std::size_t cols) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows) {}

    Matrix(std::size_t rows, std::size_t cols, T const& default_val) : m_rows(rows), m_cols(cols), m_data(m_cols * m_rows, default_val) {}



    constexpr std::size_t get_columns() const { return m_cols; }

    constexpr std::size_t get_rows() const { return m_rows; }

    constexpr std::size_t get_element_count() const {

        assert(m_cols * m_rows == m_data.size());

        return m_cols * m_rows;

    }



    T* data() { return m_data.data(); }

    T const* data() const { return m_data.data(); }



    reference operator()(std::size_t const column_x, std::size_t const row_y) {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



    const_reference operator()(std::size_t const column_x, std::size_t const row_y) const {

        assert(0 <= column_x);

        assert(column_x < get_columns());

        assert(0 <= row_y);

        assert(row_y < get_rows());



        return m_data[row_y * m_cols + column_x];

    }



  private:

    std::size_t const m_rows;

    std::size_t const m_cols;

    std::vector<T> m_data;

};





static_assert(false, "FIX ME");

using T = char;

// using T = short;

// using T = int;

// using T = double;





void run(std::size_t const my_rank, std::size_t const num_threads, Barrier& barrier, Matrix<T> const& from_data, Matrix<T>& to_data) {

    auto n = from_data.get_element_count();

    std::string str;



    if (my_rank == 0) {

        std::cerr << "bytes to copy: " << (n * sizeof(T)) << 'n';

    }



    // initialization

    std::size_t segment_size = n / num_threads;

    std::size_t start = (my_rank * segment_size) * sizeof(T);

    std::size_t end = ((my_rank + 1) * segment_size) * sizeof(T);

    std::size_t distance = end - start;





    str += "  my_rank: " + std::to_string(my_rank);

    str += "  segment_size: " + std::to_string(segment_size);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  rank: " + std::to_string(my_rank);

    str += "  start: " + std::to_string(start);

    str += "  end: " + std::to_string(end);

    str += "  distance: " + std::to_string(distance);

    str += "  e: " + std::to_string(start + distance);

    str += "n";

    std::cerr << str;



    barrier.wait();

    std::memcpy(to_data.data() + start, from_data.data() + start, distance);

    barrier.wait();





    if (my_rank == 0)

        for (auto y = 0; y < from_data.get_rows(); y++) {

            for (auto x = 0; x < from_data.get_columns(); x++) {

                if (to_data(x, y) != from_data(x, y)) {

                    std::cerr << "x: " << x << 't' << "y: " << y << "tt";

                    std::cerr << "to: " << to_data(x, y) << 't' << "from: " << from_data(x, y) << 'n';

                }

            }

        }



    barrier.wait();

}





int main() {

    auto const num_threads = 1;

    // auto const num_threads = 4;



    // auto const width = 64;

    // auto const height = 64;

    auto const width = 97;

    auto const height = 101;



    auto from_data = Matrix<T>(width, height, 70);

    auto to_data = Matrix<T>(width, height, 84);



    std::vector<std::thread> threads;

    auto barrier = Barrier{num_threads};

    for (auto i = 0; i < num_threads; i++) {

        threads.emplace_back(run, i, num_threads, std::ref(barrier), std::ref(from_data), std::ref(to_data));

    }



    for (auto& thread : threads) {

        thread.join();

    }

}

c++ multithreading pointers c++14 memcpy

edited Nov 15 '18 at 11:59

asked Nov 15 '18 at 3:53

Brandon

1931312

edited Nov 15 '18 at 11:59

asked Nov 15 '18 at 3:53

Brandon

1931312

edited Nov 15 '18 at 11:59

asked Nov 15 '18 at 3:53

Brandon

1931312

asked Nov 15 '18 at 3:53

Brandon

1931312

asked Nov 15 '18 at 3:53

Brandon

1931312

I doubt you will get a significant performance improvement from threads if your goal is just to duplicate data in memory... maybe if you use a number of threads equal to the number of memory channels but even so, CPU clocks are already about the double the memory's, which makes it four times faster considering you have to read and then write back, the memory bus is the bottleneck here, not the CPU.

– Havenard
Nov 15 '18 at 4:14

1

auto from_data = Matrix<std::string>(width, height, 70); -- Your code is instantly broken. If you had considered things like this, you would have never used memcpy. Never use std::memcpy if there is a chance that the thing you're copying could be non-POD. Compilers these days are smart enough to choose what type of copy to use when you use std::copy instead (either memcpy, a loop, etc.).

– PaulMcKenzie
Nov 15 '18 at 4:32

@Havenard The threads already exist at this point in the code and would have nothing to do but wait around for the memcpy to finish. (it's also for a university assignment)

– Brandon
Nov 15 '18 at 4:33

add a comment |

I doubt you will get a significant performance improvement from threads if your goal is just to duplicate data in memory... maybe if you use a number of threads equal to the number of memory channels but even so, CPU clocks are already about the double the memory's, which makes it four times faster considering you have to read and then write back, the memory bus is the bottleneck here, not the CPU.

– Havenard
Nov 15 '18 at 4:14

1

auto from_data = Matrix<std::string>(width, height, 70); -- Your code is instantly broken. If you had considered things like this, you would have never used memcpy. Never use std::memcpy if there is a chance that the thing you're copying could be non-POD. Compilers these days are smart enough to choose what type of copy to use when you use std::copy instead (either memcpy, a loop, etc.).

– PaulMcKenzie
Nov 15 '18 at 4:32

@Havenard The threads already exist at this point in the code and would have nothing to do but wait around for the memcpy to finish. (it's also for a university assignment)

– Brandon
Nov 15 '18 at 4:33

I doubt you will get a significant performance improvement from threads if your goal is just to duplicate data in memory... maybe if you use a number of threads equal to the number of memory channels but even so, CPU clocks are already about the double the memory's, which makes it four times faster considering you have to read and then write back, the memory bus is the bottleneck here, not the CPU.

– Havenard
Nov 15 '18 at 4:14

auto from_data = Matrix<std::string>(width, height, 70); -- Your code is instantly broken. If you had considered things like this, you would have never used memcpy. Never use std::memcpy if there is a chance that the thing you're copying could be non-POD. Compilers these days are smart enough to choose what type of copy to use when you use std::copy instead (either memcpy, a loop, etc.).

– PaulMcKenzie
Nov 15 '18 at 4:32

@Havenard The threads already exist at this point in the code and would have nothing to do but wait around for the memcpy to finish. (it's also for a university assignment)

– Brandon
Nov 15 '18 at 4:33

add a comment |

1 Answer
1

active

oldest

votes

std::memcpy(to_data.data() + start, from_data.data() + start, distance)

std::vector<T>::data() returns a T* so if you add an integral value foo to it, you effectively add foo * sizeof T bytes ... but you allready multiplied with sizeof(T) earlier when calculating start and end. Also, std::memcpy() won't work for Ts that are not PODs.

Better use std::copy().

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

2

Also, std::copy is written to be smart enough to fall back to std::memcpy or equivalent if the type that is detected is trivially copyable. So you're not losing anything by using std::copy.

– PaulMcKenzie
Nov 15 '18 at 4:40

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53312199%2fparallel-memcpy-in-cpp%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

std::memcpy(to_data.data() + start, from_data.data() + start, distance)

Better use std::copy().

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

2

Also, std::copy is written to be smart enough to fall back to std::memcpy or equivalent if the type that is detected is trivially copyable. So you're not losing anything by using std::copy.

– PaulMcKenzie
Nov 15 '18 at 4:40

add a comment |

std::memcpy(to_data.data() + start, from_data.data() + start, distance)

Better use std::copy().

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

2

Also, std::copy is written to be smart enough to fall back to std::memcpy or equivalent if the type that is detected is trivially copyable. So you're not losing anything by using std::copy.

– PaulMcKenzie
Nov 15 '18 at 4:40

add a comment |

std::memcpy(to_data.data() + start, from_data.data() + start, distance)

Better use std::copy().

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

std::memcpy(to_data.data() + start, from_data.data() + start, distance)

Better use std::copy().

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

answered Nov 15 '18 at 4:06

Swordfish

9,34811336

2

Also, std::copy is written to be smart enough to fall back to std::memcpy or equivalent if the type that is detected is trivially copyable. So you're not losing anything by using std::copy.

– PaulMcKenzie
Nov 15 '18 at 4:40

add a comment |

2

Also, std::copy is written to be smart enough to fall back to std::memcpy or equivalent if the type that is detected is trivially copyable. So you're not losing anything by using std::copy.

– PaulMcKenzie
Nov 15 '18 at 4:40

Also, std::copy is written to be smart enough to fall back to std::memcpy or equivalent if the type that is detected is trivially copyable. So you're not losing anything by using std::copy.

– PaulMcKenzie
Nov 15 '18 at 4:40

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Wsrtjtyk