Skip to content

Instantly share code, notes, and snippets.

@lambday
Last active March 28, 2017 07:54
Show Gist options
  • Select an option

  • Save lambday/25224ef9b54ecdbb2405b0e12efae90c to your computer and use it in GitHub Desktop.

Select an option

Save lambday/25224ef9b54ecdbb2405b0e12efae90c to your computer and use it in GitHub Desktop.
class CDistance
{
SGMatrix<float64_t> CDistance::get_distance_matrix()
{
SGMatrix<float64_t> distance_matrix(lhs.size(), rhs.size());
for (const auto& sample : rhs)
{
lhs.parallel_stream()
.map([&sample, this](CFeatureSample const * other_sample)
{
return this->distance(sample, other_sample);
})
.collect(Collectors::to_vector(distance_matrix.col(sample.index())));
}
}
};
class CEuclideanDistance
{
void init(CDotFeatures const * lhs, CDotFeatures const * rhs);
void cache_squared_norms()
{
auto norm_functor = [](CDotFeatureSample const * sample)
{
return sample->dot(sample);
};
norm_cache_lhs = lhs.parallel_strem().map(norm_functor).collect(Collectors::to_vector<float64_t>());
norm_cache_rhs = rhs.parallel_strem().map(norm_functor).collect(Collectors::to_vector<float64_t>());
}
virtual float64_t distance(CFeatureSample const * first, CFeaturesSample const * second) override
{
auto first_dot_feature = static_cast<CDotFeatureSample const *>(first);
auto second_dot_feature = static_cast<CDotFeatureSample const *>(second);
auto result = first_dot_feature->dot(second_dot_feature);
return CMath::sqrt(norm_cache_lhs[first->index()] + norm_cache_rhs[second->index()] - 2 * result);
}
};
class CKernel
{
SGMatrix<float64_t> CKernel::get_kernel_matrix()
{
SGMatrix<float64_t> kernel_matrix(lhs.size(), rhs.size());
for (const auto& sample : rhs)
{
lhs.parallel_stream()
.map([&sample, this](CFeatureSample const * other_sample)
{
return this->kernel(sample, other_sample);
})
.collect(Collectors::to_vector(kernel_matrix.col(sample.index())));
}
}
};
class CBTestMMD
{
// punching multiple jobs together for performance reasons
// can use it for computing both these value simultaneously
template <class Block>
auto compute_jobs(const Block& block)
{
auto mmd = statistic_job(block);
auto var = variance_job(block);
return std::make_pair(statistic, variance);
}
float64_t compute_statistic()
{
auto result = 0.0;
if (kernel_matrix_precomputed)
{
result = kernel_matrix.diagonal_block_stream(blocksize)
.map(statistic_job)
.mean();
}
else
{
result = data_mgr.block_stream(blocksize)
.map([this](const NextSamples& next_samples)
{
auto samples_p = next_samples.sample_at(0);
auto samples_q = next_samples.sample_at(1);
return kernel->get_kernel_matrix(samples_p, samples_q);
})
.map(statistic_job)
.mean();
}
return normalize_statistic(result);
}
/**
* Example of packing multiple computation job per element in the stream and reducing to multiple values
*/
std::pair<float64_t, float64_t> compute_statistic_variance()
{
auto result = make_pair(0.0, 0.0);
auto unitary = std::make_pair(0.0, size_t(1));
// the following can also be off-the-shelf, where a supplied lambda simply helps it extract
// the element (which, in this case, can simply be std::get<0, float64_t, float64_t>
// signature : unitary accumulator(decltype(unitary), AnyValue)
auto accumulator = [](const std::pair<float64_t, size_t>& value, const decltype(result)& pair)
{
auto& running_mean = value.first;
const auto& num_terms_including_current = ++value.second;
auto delta = pair.first - running_mean;
running_mean = running_mean + delta/num_terms_including_current;
return value;
};
auto finalizer = std::get<0, float64_t, size_t>; // final return value
// should have the sinature auto finalizer(decltype(unitary))
// optional combiner in case of parallel streams
auto statistic_reducer = Reducers::custom(unitary, accumulator, finalizer);
// similarly, variance reducer
auto variance_reducer = Reducers::custom(...);
// if saving the values are necessary
// auto mmds = SGVector<float64_t>(num_blocks);
// auto vars = SGVector<float64_t>(num_blocks);
// auto statistic_collector = ...
if (kernel_matrix_precomputed)
{
result = kernel_matrix.diagonal_block_stream(blocksize)
.map(compute_jobs)
.reduce(statistic_reducer, variance_reducer);
// the return type is a scalar if number of reducers is 1, std::pair if number of reducers is 2, tuple if more
}
else
{
result = data_mgr.block_stream(blocksize)
.map([this](const NextSamples& next_samples)
{
auto samples_p = next_samples.sample_at(0);
auto samples_q = next_samples.sample_at(1);
return kernel->get_kernel_matrix(samples_p, samples_q);
})
.map(compute_jobs)
.reduce(statistic_reducer, variance_reducer);
}
return make_pair(normalize_statistic(result.first), normalize_variance(result.second));
}
}
#include <map>
#include <iostream>
#include <functional>
#include <tuple>
#include <memory>
#include <initializer_list>
#include <utility>
namespace std
{
template<size_t I, class T>
T& get(T& t)
{
return t;
}
}
namespace Reducers
{
template <class T>
struct num_elements
{
enum { value = 1 };
};
template <class A, class B>
struct num_elements<std::pair<A,B>>
{
enum { value = 2 };
};
template <class... A>
struct num_elements<std::tuple<A...>>
{
enum { value = std::tuple_size<std::tuple<A...>>::value };
};
template <class R, class V, size_t TotalNumReducers, size_t CurrentNumReducer>
struct accumulate_all : accumulate_all<R,V,TotalNumReducers,CurrentNumReducer-1>
{
typedef typename R::unitary_type U;
explicit accumulate_all(R& r, V& value, U& accumulated)
: accumulate_all<R,V,TotalNumReducers,CurrentNumReducer-1>(r, value, accumulated)
{
std::get<CurrentNumReducer-1>(accumulated) = std::get<CurrentNumReducer-1>(r.accumulate)(std::get<CurrentNumReducer-1>(accumulated), value);
}
};
template <class R, class V, size_t TotalNumReducers>
struct accumulate_all<R,V,TotalNumReducers,1>
{
typedef typename R::unitary_type U;
explicit accumulate_all(R& r, V& value, U& accumulated)
{
std::get<0>(accumulated) = std::get<0>(r.accumulate)(std::get<0>(accumulated), value);
}
};
template <class R, class V>
struct accumulate_all<R,V,1,1>
{
typedef typename R::unitary_type U;
explicit accumulate_all(R& r, V& value, U& accumulated)
{
accumulated = r.accumulate(accumulated, value);
}
};
template <class R, class V>
struct accumulator : accumulate_all<R,V,R::num_reducers,R::num_reducers>
{
typedef typename R::unitary_type U;
explicit accumulator(R& r, V& value, U& accumulated)
: accumulate_all<R,V,R::num_reducers,R::num_reducers>(r, value, accumulated)
{
}
};
template <class U, class A, size_t N>
struct Reducer
{
using unitary_type = U;
using accumulate_type = A;
static constexpr size_t num_reducers = N;
using type = Reducer<U,A,N>;
Reducer(const U& _unitary, A&& _accumulate)
: unitary(_unitary), accumulate(std::forward<A>(_accumulate))
{
}
template <class It>
U reduce(It current, It end, U accumulated)
{
typedef typename It::value_type V;
if (current == end)
return accumulated;
accumulator<type,V>(*this, *current, accumulated);
return reduce(++current, end, accumulated);
}
template <class C>
U reduce(C const * const c)
{
return reduce(c->begin(), c->end(), unitary);
}
U unitary;
A accumulate;
};
template <class U, class A>
auto custom(U&& unitary, A&& accumulate) -> Reducer<U,A,num_elements<A>::value>
{
return Reducer<U,A,num_elements<A>::value>(unitary, std::forward<A>(accumulate));
}
}
using Reducers::Reducer;
using Reducers::custom;
template <class C>
struct Stream
{
Stream(C const * const _c) : c(_c) {}
template <class R>
auto reduce(R& r) -> typename R::unitary_type
{
return r.reduce(c);
}
template <class R1, class R2>
auto reduce(R1& r1, R2& r2) -> std::pair<typename R1::unitary_type,typename R2::unitary_type>
{
return custom(std::make_pair(r1.unitary, r2.unitary), std::make_pair(r1.accumulate, r2.accumulate)).reduce(c);
}
template <class...Rs>
auto reduce(Rs...rs) -> std::tuple<typename Rs::unitary_type...>
{
return custom(std::make_tuple(rs.unitary...), std::make_tuple(rs.accumulate...)).reduce(c);
}
auto sum() -> typename C::value_type
{
typedef typename C::value_type T;
return custom(0, [](T& r, T& v) { return r+v; }).reduce(c);
}
auto prod() -> typename C::value_type
{
typedef typename C::value_type T;
return custom(1, [](T& r, T& v) { return r*v; }).reduce(c);
}
double double_mean()
{
typedef typename C::value_type T;
auto result = custom(std::make_pair(0.0,1ul), [](std::pair<double,size_t> r, T& v)
{
auto delta = v - r.first;
r.first += delta/r.second++;
return r;
}).reduce(c);
return result.first;
}
C const * const c;
};
template <class T>
struct Collection
{
template <class V>
struct iterator : std::iterator<std::bidirectional_iterator_tag,V>
{
using value_type = V;
explicit iterator(T* _ptr) : ptr(_ptr) {}
iterator& operator++() { ptr++; return *this; }
iterator operator++(int) { auto ret = *this; ++(*this); return ret; }
iterator& operator--() { ptr--; return *this; }
iterator operator--(int) { auto ret = *this; --(*this); return ret; }
friend bool operator==(iterator& first, iterator& second)
{
return first.ptr == second.ptr;
}
friend bool operator!=(iterator& first, iterator& second)
{
return !(first == second);
}
V& operator*() { return *ptr; }
V* ptr;
};
using value_type = T;
using iterator_type = iterator<T>;
Stream<Collection<T>> stream() const
{
return Stream<Collection<T>>(this);
}
virtual iterator<T> begin() const = 0;
virtual iterator<T> end() const = 0;
};
template <class T>
struct Vector : public Collection<T>
{
using iterator_type = typename Collection<T>::iterator_type;
Vector(std::initializer_list<T> list)
: vec(std::make_unique<T[]>(list.size())), vlen(list.size())
{
std::copy(list.begin(), list.end(), vec.get());
}
virtual iterator_type begin() const override
{
return iterator_type(vec.get());
}
virtual iterator_type end() const override
{
return iterator_type(vec.get() + vlen);
}
std::unique_ptr<T[]> vec;
size_t vlen;
};
int main()
{
Vector<int> v({1,2,3,4,5,6,7,8,9,10});
// basic reduction
std::cout << "off-the-shelf ----------" << std::endl;
std::cout << "sum = " << v.stream().sum() << std::endl;
std::cout << "prod = " << v.stream().prod() << std::endl;
std::cout << "mean = " << v.stream().double_mean() << std::endl;
// custom and complex reductions
auto r1 = custom(0, [](int r, int v) { return r+v; });
auto r2 = custom(1, [](int r, int v) { return r*v; });
auto r3 = custom(std::make_pair(0.0,1ul), [](std::pair<double,size_t> r, int v)
{
auto delta = v - r.first;
r.first += delta/r.second++;
return r;
});
auto result = v.stream().reduce(r1);
auto result2 = v.stream().reduce(r1, r2);
auto result3 = v.stream().reduce(r1, r2, r3);
std::cout << "custom reducers --------" << std::endl;
std::cout << "sum = " << std::get<0>(result) << std::endl;
std::cout << "sum, prd = " << std::get<0>(result2) << ", " << std::get<1>(result2) << std::endl;
std::cout << "sum, prod, mean = " << std::get<0>(result3) << ", " << std::get<1>(result3) << ", " << std::get<2>(result3).first << std::endl;
return 0;
}
off-the-shelf ----------
sum = 55
prod = 3628800
mean = 5.5
custom reducers --------
sum = 55
sum, prd = 55, 3628800
sum, prod, mean = 55, 3628800, 5.5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment