X-Git-Url: http://wagnertech.de/git?a=blobdiff_plain;f=i686-linux-gnu-4.7%2Fusr%2Finclude%2Fc%2B%2B%2F4.7%2Fparallel%2Fmultiway_mergesort.h;fp=i686-linux-gnu-4.7%2Fusr%2Finclude%2Fc%2B%2B%2F4.7%2Fparallel%2Fmultiway_mergesort.h;h=0a9e78c1a90e174c79456611acec842657bef189;hb=94df942c2c7bd3457276fe5b7367623cbb8c1302;hp=0000000000000000000000000000000000000000;hpb=4dd7d9155a920895ff7b1cb6b9c9c676aa62000a;p=cross.git diff --git a/i686-linux-gnu-4.7/usr/include/c++/4.7/parallel/multiway_mergesort.h b/i686-linux-gnu-4.7/usr/include/c++/4.7/parallel/multiway_mergesort.h new file mode 100644 index 0000000..0a9e78c --- /dev/null +++ b/i686-linux-gnu-4.7/usr/include/c++/4.7/parallel/multiway_mergesort.h @@ -0,0 +1,480 @@ +// -*- C++ -*- + +// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License as published by the Free Software +// Foundation; either version 3, or (at your option) any later +// version. + +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** @file parallel/multiway_mergesort.h + * @brief Parallel multiway merge sort. + * This file is a GNU parallel extension to the Standard C++ Library. + */ + +// Written by Johannes Singler. + +#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H +#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1 + +#include + +#include +#include +#include +#include + +namespace __gnu_parallel +{ + /** @brief Subsequence description. */ + template + struct _Piece + { + typedef _DifferenceTp _DifferenceType; + + /** @brief Begin of subsequence. */ + _DifferenceType _M_begin; + + /** @brief End of subsequence. */ + _DifferenceType _M_end; + }; + + /** @brief Data accessed by all threads. + * + * PMWMS = parallel multiway mergesort */ + template + struct _PMWMSSortingData + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + /** @brief Number of threads involved. */ + _ThreadIndex _M_num_threads; + + /** @brief Input __begin. */ + _RAIter _M_source; + + /** @brief Start indices, per thread. */ + _DifferenceType* _M_starts; + + /** @brief Storage in which to sort. */ + _ValueType** _M_temporary; + + /** @brief Samples. */ + _ValueType* _M_samples; + + /** @brief Offsets to add to the found positions. */ + _DifferenceType* _M_offsets; + + /** @brief Pieces of data to merge @c [thread][__sequence] */ + std::vector<_Piece<_DifferenceType> >* _M_pieces; + }; + + /** + * @brief Select _M_samples from a sequence. + * @param __sd Pointer to algorithm data. _Result will be placed in + * @c __sd->_M_samples. + * @param __num_samples Number of _M_samples to select. + */ + template + void + __determine_samples(_PMWMSSortingData<_RAIter>* __sd, + _DifferenceTp __num_samples) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef _DifferenceTp _DifferenceType; + + _ThreadIndex __iam = omp_get_thread_num(); + + _DifferenceType* __es = new _DifferenceType[__num_samples + 2]; + + __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], + __num_samples + 1, __es); + + for (_DifferenceType __i = 0; __i < __num_samples; ++__i) + ::new(&(__sd->_M_samples[__iam * __num_samples + __i])) + _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + + __es[__i + 1]]); + + delete[] __es; + } + + /** @brief Split consistently. */ + template + struct _SplitConsistently + { }; + + /** @brief Split by exact splitting. */ + template + struct _SplitConsistently + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { +# pragma omp barrier + + std::vector > + __seqs(__sd->_M_num_threads); + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + __seqs[__s] = std::make_pair(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s])); + + std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads); + + // if not last thread + if (__iam < __sd->_M_num_threads - 1) + multiseq_partition(__seqs.begin(), __seqs.end(), + __sd->_M_starts[__iam + 1], __offsets.begin(), + __comp); + + for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // for each sequence + if (__iam < (__sd->_M_num_threads - 1)) + __sd->_M_pieces[__iam][__seq]._M_end + = __offsets[__seq] - __seqs[__seq].first; + else + // very end of this sequence + __sd->_M_pieces[__iam][__seq]._M_end = + __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; + } + +# pragma omp barrier + + for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++) + { + // For each sequence. + if (__iam > 0) + __sd->_M_pieces[__iam][__seq]._M_begin = + __sd->_M_pieces[__iam - 1][__seq]._M_end; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__seq]._M_begin = 0; + } + } + }; + + /** @brief Split by sampling. */ + template + struct _SplitConsistently + { + void + operator()(const _ThreadIndex __iam, + _PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp, + const typename + std::iterator_traits<_RAIter>::difference_type + __num_samples) const + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + __determine_samples(__sd, __num_samples); + +# pragma omp barrier + +# pragma omp single + __gnu_sequential::sort(__sd->_M_samples, + __sd->_M_samples + + (__num_samples * __sd->_M_num_threads), + __comp); + +# pragma omp barrier + + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) + { + // For each sequence. + if (__num_samples * __iam > 0) + __sd->_M_pieces[__iam][__s]._M_begin = + std::lower_bound(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * __iam], + __comp) + - __sd->_M_temporary[__s]; + else + // Absolute beginning. + __sd->_M_pieces[__iam][__s]._M_begin = 0; + + if ((__num_samples * (__iam + 1)) < + (__num_samples * __sd->_M_num_threads)) + __sd->_M_pieces[__iam][__s]._M_end = + std::lower_bound(__sd->_M_temporary[__s], + __sd->_M_temporary[__s] + + (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]), + __sd->_M_samples[__num_samples * (__iam + 1)], + __comp) + - __sd->_M_temporary[__s]; + else + // Absolute end. + __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1] + - __sd->_M_starts[__s]); + } + } + }; + + template + struct __possibly_stable_sort + { }; + + template + struct __possibly_stable_sort + { + void operator()(const _RAIter& __begin, + const _RAIter& __end, _Compare& __comp) const + { __gnu_sequential::stable_sort(__begin, __end, __comp); } + }; + + template + struct __possibly_stable_sort + { + void operator()(const _RAIter __begin, + const _RAIter __end, _Compare& __comp) const + { __gnu_sequential::sort(__begin, __end, __comp); } + }; + + template + struct __possibly_stable_multiway_merge + { }; + + template + struct __possibly_stable_multiway_merge + { + void operator()(const Seq_RAIter& __seqs_begin, + const Seq_RAIter& __seqs_end, + const _RAIter& __target, + _Compare& __comp, + _DiffType __length_am) const + { stable_multiway_merge(__seqs_begin, __seqs_end, __target, + __length_am, __comp, sequential_tag()); } + }; + + template + struct __possibly_stable_multiway_merge + { + void operator()(const Seq_RAIter& __seqs_begin, + const Seq_RAIter& __seqs_end, + const _RAIter& __target, + _Compare& __comp, + _DiffType __length_am) const + { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, + __comp, sequential_tag()); } + }; + + /** @brief PMWMS code executed by each thread. + * @param __sd Pointer to algorithm data. + * @param __comp Comparator. + */ + template + void + parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd, + _Compare& __comp) + { + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _ThreadIndex __iam = omp_get_thread_num(); + + // Length of this thread's chunk, before merging. + _DifferenceType __length_local = + __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; + + // Sort in temporary storage, leave space for sentinel. + + typedef _ValueType* _SortingPlacesIterator; + + __sd->_M_temporary[__iam] = + static_cast<_ValueType*>(::operator new(sizeof(_ValueType) + * (__length_local + 1))); + + // Copy there. + std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], + __sd->_M_source + __sd->_M_starts[__iam] + + __length_local, + __sd->_M_temporary[__iam]); + + __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() + (__sd->_M_temporary[__iam], + __sd->_M_temporary[__iam] + __length_local, + __comp); + + // Invariant: locally sorted subsequence in sd->_M_temporary[__iam], + // __sd->_M_temporary[__iam] + __length_local. + + // No barrier here: Synchronization is done by the splitting routine. + + _DifferenceType __num_samples = + _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; + _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>() + (__iam, __sd, __comp, __num_samples); + + // Offset from __target __begin, __length after merging. + _DifferenceType __offset = 0, __length_am = 0; + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) + { + __length_am += (__sd->_M_pieces[__iam][__s]._M_end + - __sd->_M_pieces[__iam][__s]._M_begin); + __offset += __sd->_M_pieces[__iam][__s]._M_begin; + } + + typedef std::vector< + std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > + _SeqVector; + _SeqVector __seqs(__sd->_M_num_threads); + + for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s) + { + __seqs[__s] = + std::make_pair(__sd->_M_temporary[__s] + + __sd->_M_pieces[__iam][__s]._M_begin, + __sd->_M_temporary[__s] + + __sd->_M_pieces[__iam][__s]._M_end); + } + + __possibly_stable_multiway_merge< + __stable, typename _SeqVector::iterator, + _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(), + __sd->_M_source + __offset, __comp, + __length_am); + +# pragma omp barrier + + for (_DifferenceType __i = 0; __i < __length_local; ++__i) + __sd->_M_temporary[__iam][__i].~_ValueType(); + ::operator delete(__sd->_M_temporary[__iam]); + } + + /** @brief PMWMS main call. + * @param __begin Begin iterator of sequence. + * @param __end End iterator of sequence. + * @param __comp Comparator. + * @param __num_threads Number of threads to use. + */ + template + void + parallel_sort_mwms(_RAIter __begin, _RAIter __end, + _Compare __comp, + _ThreadIndex __num_threads) + { + _GLIBCXX_CALL(__end - __begin) + + typedef std::iterator_traits<_RAIter> _TraitsType; + typedef typename _TraitsType::value_type _ValueType; + typedef typename _TraitsType::difference_type _DifferenceType; + + _DifferenceType __n = __end - __begin; + + if (__n <= 1) + return; + + // at least one element per thread + if (__num_threads > __n) + __num_threads = static_cast<_ThreadIndex>(__n); + + // shared variables + _PMWMSSortingData<_RAIter> __sd; + _DifferenceType* __starts; + _DifferenceType __size; + +# pragma omp parallel num_threads(__num_threads) + { + __num_threads = omp_get_num_threads(); //no more threads than requested + +# pragma omp single + { + __sd._M_num_threads = __num_threads; + __sd._M_source = __begin; + + __sd._M_temporary = new _ValueType*[__num_threads]; + + if (!__exact) + { + __size = + (_Settings::get().sort_mwms_oversampling * __num_threads - 1) + * __num_threads; + __sd._M_samples = static_cast<_ValueType*> + (::operator new(__size * sizeof(_ValueType))); + } + else + __sd._M_samples = 0; + + __sd._M_offsets = new _DifferenceType[__num_threads - 1]; + __sd._M_pieces + = new std::vector<_Piece<_DifferenceType> >[__num_threads]; + for (_ThreadIndex __s = 0; __s < __num_threads; ++__s) + __sd._M_pieces[__s].resize(__num_threads); + __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; + + _DifferenceType __chunk_length = __n / __num_threads; + _DifferenceType __split = __n % __num_threads; + _DifferenceType __pos = 0; + for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) + { + __starts[__i] = __pos; + __pos += ((__i < __split) + ? (__chunk_length + 1) : __chunk_length); + } + __starts[__num_threads] = __pos; + } //single + + // Now sort in parallel. + parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); + } //parallel + + delete[] __starts; + delete[] __sd._M_temporary; + + if (!__exact) + { + for (_DifferenceType __i = 0; __i < __size; ++__i) + __sd._M_samples[__i].~_ValueType(); + ::operator delete(__sd._M_samples); + } + + delete[] __sd._M_offsets; + delete[] __sd._M_pieces; + } + +} //namespace __gnu_parallel + +#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */