Last active
March 28, 2017 07:54
-
-
Save lambday/25224ef9b54ecdbb2405b0e12efae90c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class CDistance | |
| { | |
| SGMatrix<float64_t> CDistance::get_distance_matrix() | |
| { | |
| SGMatrix<float64_t> distance_matrix(lhs.size(), rhs.size()); | |
| for (const auto& sample : rhs) | |
| { | |
| lhs.parallel_stream() | |
| .map([&sample, this](CFeatureSample const * other_sample) | |
| { | |
| return this->distance(sample, other_sample); | |
| }) | |
| .collect(Collectors::to_vector(distance_matrix.col(sample.index()))); | |
| } | |
| } | |
| }; | |
| class CEuclideanDistance | |
| { | |
| void init(CDotFeatures const * lhs, CDotFeatures const * rhs); | |
| void cache_squared_norms() | |
| { | |
| auto norm_functor = [](CDotFeatureSample const * sample) | |
| { | |
| return sample->dot(sample); | |
| }; | |
| norm_cache_lhs = lhs.parallel_strem().map(norm_functor).collect(Collectors::to_vector<float64_t>()); | |
| norm_cache_rhs = rhs.parallel_strem().map(norm_functor).collect(Collectors::to_vector<float64_t>()); | |
| } | |
| virtual float64_t distance(CFeatureSample const * first, CFeaturesSample const * second) override | |
| { | |
| auto first_dot_feature = static_cast<CDotFeatureSample const *>(first); | |
| auto second_dot_feature = static_cast<CDotFeatureSample const *>(second); | |
| auto result = first_dot_feature->dot(second_dot_feature); | |
| return CMath::sqrt(norm_cache_lhs[first->index()] + norm_cache_rhs[second->index()] - 2 * result); | |
| } | |
| }; | |
| class CKernel | |
| { | |
| SGMatrix<float64_t> CKernel::get_kernel_matrix() | |
| { | |
| SGMatrix<float64_t> kernel_matrix(lhs.size(), rhs.size()); | |
| for (const auto& sample : rhs) | |
| { | |
| lhs.parallel_stream() | |
| .map([&sample, this](CFeatureSample const * other_sample) | |
| { | |
| return this->kernel(sample, other_sample); | |
| }) | |
| .collect(Collectors::to_vector(kernel_matrix.col(sample.index()))); | |
| } | |
| } | |
| }; | |
| class CBTestMMD | |
| { | |
| // punching multiple jobs together for performance reasons | |
| // can use it for computing both these value simultaneously | |
| template <class Block> | |
| auto compute_jobs(const Block& block) | |
| { | |
| auto mmd = statistic_job(block); | |
| auto var = variance_job(block); | |
| return std::make_pair(statistic, variance); | |
| } | |
| float64_t compute_statistic() | |
| { | |
| auto result = 0.0; | |
| if (kernel_matrix_precomputed) | |
| { | |
| result = kernel_matrix.diagonal_block_stream(blocksize) | |
| .map(statistic_job) | |
| .mean(); | |
| } | |
| else | |
| { | |
| result = data_mgr.block_stream(blocksize) | |
| .map([this](const NextSamples& next_samples) | |
| { | |
| auto samples_p = next_samples.sample_at(0); | |
| auto samples_q = next_samples.sample_at(1); | |
| return kernel->get_kernel_matrix(samples_p, samples_q); | |
| }) | |
| .map(statistic_job) | |
| .mean(); | |
| } | |
| return normalize_statistic(result); | |
| } | |
| /** | |
| * Example of packing multiple computation job per element in the stream and reducing to multiple values | |
| */ | |
| std::pair<float64_t, float64_t> compute_statistic_variance() | |
| { | |
| auto result = make_pair(0.0, 0.0); | |
| auto unitary = std::make_pair(0.0, size_t(1)); | |
| // the following can also be off-the-shelf, where a supplied lambda simply helps it extract | |
| // the element (which, in this case, can simply be std::get<0, float64_t, float64_t> | |
| // signature : unitary accumulator(decltype(unitary), AnyValue) | |
| auto accumulator = [](const std::pair<float64_t, size_t>& value, const decltype(result)& pair) | |
| { | |
| auto& running_mean = value.first; | |
| const auto& num_terms_including_current = ++value.second; | |
| auto delta = pair.first - running_mean; | |
| running_mean = running_mean + delta/num_terms_including_current; | |
| return value; | |
| }; | |
| auto finalizer = std::get<0, float64_t, size_t>; // final return value | |
| // should have the sinature auto finalizer(decltype(unitary)) | |
| // optional combiner in case of parallel streams | |
| auto statistic_reducer = Reducers::custom(unitary, accumulator, finalizer); | |
| // similarly, variance reducer | |
| auto variance_reducer = Reducers::custom(...); | |
| // if saving the values are necessary | |
| // auto mmds = SGVector<float64_t>(num_blocks); | |
| // auto vars = SGVector<float64_t>(num_blocks); | |
| // auto statistic_collector = ... | |
| if (kernel_matrix_precomputed) | |
| { | |
| result = kernel_matrix.diagonal_block_stream(blocksize) | |
| .map(compute_jobs) | |
| .reduce(statistic_reducer, variance_reducer); | |
| // the return type is a scalar if number of reducers is 1, std::pair if number of reducers is 2, tuple if more | |
| } | |
| else | |
| { | |
| result = data_mgr.block_stream(blocksize) | |
| .map([this](const NextSamples& next_samples) | |
| { | |
| auto samples_p = next_samples.sample_at(0); | |
| auto samples_q = next_samples.sample_at(1); | |
| return kernel->get_kernel_matrix(samples_p, samples_q); | |
| }) | |
| .map(compute_jobs) | |
| .reduce(statistic_reducer, variance_reducer); | |
| } | |
| return make_pair(normalize_statistic(result.first), normalize_variance(result.second)); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <map> | |
| #include <iostream> | |
| #include <functional> | |
| #include <tuple> | |
| #include <memory> | |
| #include <initializer_list> | |
| #include <utility> | |
| namespace std | |
| { | |
| template<size_t I, class T> | |
| T& get(T& t) | |
| { | |
| return t; | |
| } | |
| } | |
| namespace Reducers | |
| { | |
| template <class T> | |
| struct num_elements | |
| { | |
| enum { value = 1 }; | |
| }; | |
| template <class A, class B> | |
| struct num_elements<std::pair<A,B>> | |
| { | |
| enum { value = 2 }; | |
| }; | |
| template <class... A> | |
| struct num_elements<std::tuple<A...>> | |
| { | |
| enum { value = std::tuple_size<std::tuple<A...>>::value }; | |
| }; | |
| template <class R, class V, size_t TotalNumReducers, size_t CurrentNumReducer> | |
| struct accumulate_all : accumulate_all<R,V,TotalNumReducers,CurrentNumReducer-1> | |
| { | |
| typedef typename R::unitary_type U; | |
| explicit accumulate_all(R& r, V& value, U& accumulated) | |
| : accumulate_all<R,V,TotalNumReducers,CurrentNumReducer-1>(r, value, accumulated) | |
| { | |
| std::get<CurrentNumReducer-1>(accumulated) = std::get<CurrentNumReducer-1>(r.accumulate)(std::get<CurrentNumReducer-1>(accumulated), value); | |
| } | |
| }; | |
| template <class R, class V, size_t TotalNumReducers> | |
| struct accumulate_all<R,V,TotalNumReducers,1> | |
| { | |
| typedef typename R::unitary_type U; | |
| explicit accumulate_all(R& r, V& value, U& accumulated) | |
| { | |
| std::get<0>(accumulated) = std::get<0>(r.accumulate)(std::get<0>(accumulated), value); | |
| } | |
| }; | |
| template <class R, class V> | |
| struct accumulate_all<R,V,1,1> | |
| { | |
| typedef typename R::unitary_type U; | |
| explicit accumulate_all(R& r, V& value, U& accumulated) | |
| { | |
| accumulated = r.accumulate(accumulated, value); | |
| } | |
| }; | |
| template <class R, class V> | |
| struct accumulator : accumulate_all<R,V,R::num_reducers,R::num_reducers> | |
| { | |
| typedef typename R::unitary_type U; | |
| explicit accumulator(R& r, V& value, U& accumulated) | |
| : accumulate_all<R,V,R::num_reducers,R::num_reducers>(r, value, accumulated) | |
| { | |
| } | |
| }; | |
| template <class U, class A, size_t N> | |
| struct Reducer | |
| { | |
| using unitary_type = U; | |
| using accumulate_type = A; | |
| static constexpr size_t num_reducers = N; | |
| using type = Reducer<U,A,N>; | |
| Reducer(const U& _unitary, A&& _accumulate) | |
| : unitary(_unitary), accumulate(std::forward<A>(_accumulate)) | |
| { | |
| } | |
| template <class It> | |
| U reduce(It current, It end, U accumulated) | |
| { | |
| typedef typename It::value_type V; | |
| if (current == end) | |
| return accumulated; | |
| accumulator<type,V>(*this, *current, accumulated); | |
| return reduce(++current, end, accumulated); | |
| } | |
| template <class C> | |
| U reduce(C const * const c) | |
| { | |
| return reduce(c->begin(), c->end(), unitary); | |
| } | |
| U unitary; | |
| A accumulate; | |
| }; | |
| template <class U, class A> | |
| auto custom(U&& unitary, A&& accumulate) -> Reducer<U,A,num_elements<A>::value> | |
| { | |
| return Reducer<U,A,num_elements<A>::value>(unitary, std::forward<A>(accumulate)); | |
| } | |
| } | |
| using Reducers::Reducer; | |
| using Reducers::custom; | |
| template <class C> | |
| struct Stream | |
| { | |
| Stream(C const * const _c) : c(_c) {} | |
| template <class R> | |
| auto reduce(R& r) -> typename R::unitary_type | |
| { | |
| return r.reduce(c); | |
| } | |
| template <class R1, class R2> | |
| auto reduce(R1& r1, R2& r2) -> std::pair<typename R1::unitary_type,typename R2::unitary_type> | |
| { | |
| return custom(std::make_pair(r1.unitary, r2.unitary), std::make_pair(r1.accumulate, r2.accumulate)).reduce(c); | |
| } | |
| template <class...Rs> | |
| auto reduce(Rs...rs) -> std::tuple<typename Rs::unitary_type...> | |
| { | |
| return custom(std::make_tuple(rs.unitary...), std::make_tuple(rs.accumulate...)).reduce(c); | |
| } | |
| auto sum() -> typename C::value_type | |
| { | |
| typedef typename C::value_type T; | |
| return custom(0, [](T& r, T& v) { return r+v; }).reduce(c); | |
| } | |
| auto prod() -> typename C::value_type | |
| { | |
| typedef typename C::value_type T; | |
| return custom(1, [](T& r, T& v) { return r*v; }).reduce(c); | |
| } | |
| double double_mean() | |
| { | |
| typedef typename C::value_type T; | |
| auto result = custom(std::make_pair(0.0,1ul), [](std::pair<double,size_t> r, T& v) | |
| { | |
| auto delta = v - r.first; | |
| r.first += delta/r.second++; | |
| return r; | |
| }).reduce(c); | |
| return result.first; | |
| } | |
| C const * const c; | |
| }; | |
| template <class T> | |
| struct Collection | |
| { | |
| template <class V> | |
| struct iterator : std::iterator<std::bidirectional_iterator_tag,V> | |
| { | |
| using value_type = V; | |
| explicit iterator(T* _ptr) : ptr(_ptr) {} | |
| iterator& operator++() { ptr++; return *this; } | |
| iterator operator++(int) { auto ret = *this; ++(*this); return ret; } | |
| iterator& operator--() { ptr--; return *this; } | |
| iterator operator--(int) { auto ret = *this; --(*this); return ret; } | |
| friend bool operator==(iterator& first, iterator& second) | |
| { | |
| return first.ptr == second.ptr; | |
| } | |
| friend bool operator!=(iterator& first, iterator& second) | |
| { | |
| return !(first == second); | |
| } | |
| V& operator*() { return *ptr; } | |
| V* ptr; | |
| }; | |
| using value_type = T; | |
| using iterator_type = iterator<T>; | |
| Stream<Collection<T>> stream() const | |
| { | |
| return Stream<Collection<T>>(this); | |
| } | |
| virtual iterator<T> begin() const = 0; | |
| virtual iterator<T> end() const = 0; | |
| }; | |
| template <class T> | |
| struct Vector : public Collection<T> | |
| { | |
| using iterator_type = typename Collection<T>::iterator_type; | |
| Vector(std::initializer_list<T> list) | |
| : vec(std::make_unique<T[]>(list.size())), vlen(list.size()) | |
| { | |
| std::copy(list.begin(), list.end(), vec.get()); | |
| } | |
| virtual iterator_type begin() const override | |
| { | |
| return iterator_type(vec.get()); | |
| } | |
| virtual iterator_type end() const override | |
| { | |
| return iterator_type(vec.get() + vlen); | |
| } | |
| std::unique_ptr<T[]> vec; | |
| size_t vlen; | |
| }; | |
| int main() | |
| { | |
| Vector<int> v({1,2,3,4,5,6,7,8,9,10}); | |
| // basic reduction | |
| std::cout << "off-the-shelf ----------" << std::endl; | |
| std::cout << "sum = " << v.stream().sum() << std::endl; | |
| std::cout << "prod = " << v.stream().prod() << std::endl; | |
| std::cout << "mean = " << v.stream().double_mean() << std::endl; | |
| // custom and complex reductions | |
| auto r1 = custom(0, [](int r, int v) { return r+v; }); | |
| auto r2 = custom(1, [](int r, int v) { return r*v; }); | |
| auto r3 = custom(std::make_pair(0.0,1ul), [](std::pair<double,size_t> r, int v) | |
| { | |
| auto delta = v - r.first; | |
| r.first += delta/r.second++; | |
| return r; | |
| }); | |
| auto result = v.stream().reduce(r1); | |
| auto result2 = v.stream().reduce(r1, r2); | |
| auto result3 = v.stream().reduce(r1, r2, r3); | |
| std::cout << "custom reducers --------" << std::endl; | |
| std::cout << "sum = " << std::get<0>(result) << std::endl; | |
| std::cout << "sum, prd = " << std::get<0>(result2) << ", " << std::get<1>(result2) << std::endl; | |
| std::cout << "sum, prod, mean = " << std::get<0>(result3) << ", " << std::get<1>(result3) << ", " << std::get<2>(result3).first << std::endl; | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| off-the-shelf ---------- | |
| sum = 55 | |
| prod = 3628800 | |
| mean = 5.5 | |
| custom reducers -------- | |
| sum = 55 | |
| sum, prd = 55, 3628800 | |
| sum, prod, mean = 55, 3628800, 5.5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment