Hermann-SW/parallel_dfs_bfs_evenly.cc

## parallel_dfs_bfs_evenly.cc
/* NOLINT(legal/copyright)
  f=parallel_dfs_bfs_evenly
  g++ $f.cc -O3 -Wall -pedantic -Wextra -fopenmp -o $f
  cpplint $f.cc
  cppcheck --enable=all --suppress=missingIncludeSystem $f.cc --check-config
*/
#include <string.h>
#include <omp.h>

#include <iostream>
#include <cassert>
#include <limits>
#include <tuple>
#include <queue>

typedef __int64_t num;
typedef __int128_t num2;

num c;
char *V;
std::queue<std::tuple<num, num, bool, bool>> *Q;

const bool lft = false;
const bool rgt = true;

/*  Longest directed acyclic graph path is that for p=5 (3,6,8,11,...).
    Path has 40% of 10^n vertices, so DAG height is 40 billion for n=11.

    0         5        10        15        20        25        30
    + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - +...
 0+      _____3___           ________________________________20_..
  !        ^      \___ _____8__            _..   (*)
  !        !        ^ 6        \___  ____13__            _..
  !        !        !  \______     11        \___  ____18__
  ! {3,5,lft,true}  !         \      \..         16        \___  _..
 5+                 !          10_..          (@)  \..         21
  ! {6,5,rgt,true}--+       (#)  \____                           \..
  !                                ^  \___
  ! {15,13,rgt,true}---------------+      \  _..           (*)
  !                                        15       {20,29,lft,false}
10+ (#) 10²+11² == 14²+5²  => 221 composite  \..
  . (@) 16²+17² == 23²+4²  => 545 composite
                                                           \
(left half of) great-grandparent DAG                        #
https://oeis.org/A000071                   3 (=5²)         / \
                          6                               /   \
              10                    8                    /     \
        15           23      11           20 (=29²)     #       #
    21      23  27       41      13  37                / \     / \
  28  52  28  61  40   88  64  16  59  49                 #   #
36......96..........106......92..........119 (=169²)     / \ / \
                                                            #
                                                           / \              */
num other(num v, num p) {
    num2 x = v;
    x = 2*x*(x+1)+1;
    assert(x%p == 0);
    x /= p;
    if (x > std::numeric_limits<num>::max()) {
        return std::numeric_limits<num>::max();
    }
    return x;
}

num next(num v, num p) {
    num r = v%p;
    return v + (2*r < p ? p-2*r-1 : 2*(p-r)-1);
}

int main(int argc, char *argv[]) {
    assert(argc == 2);
    assert(atoi(argv[1]) >= 4);
    num mx = 1;
    for (int e = 1; e <= atoi(argv[1]); ++e) {
        mx *= 10;
    }

    V = new char[1 + mx];
    assert(V);
    explicit_bzero(V, 1+mx);

    int nproc = omp_get_max_threads();
    omp_set_num_threads(nproc);

    Q = new std::queue<std::tuple<num, num, bool, bool>>[nproc];
    assert(Q);

    __int64_t five = 1;  // {3, 5, lft, true}

    _Pragma("omp parallel for")
    for (int i = 0; i < nproc; ++i) {
        bool done = false;
        for (;;) {
            while (!Q[i].empty()) {
                num v = std::get<0>(Q[i].front());
                num p = std::get<1>(Q[i].front());
                bool l = std::get<2>(Q[i].front());
                bool b = std::get<3>(Q[i].front());
                Q[i].pop();

                if (v > mx)  continue;

                V[v] = 1;
                num q = other(v, p);

                if (l == lft) {
                    if (q != p)  Q[i].push({next(v, q), q, lft, false});
                    Q[i].push({next(v, p), p, rgt, b});
                } else {
                    Q[i].push({next(v, p), p, lft, false});
                    if (b) Q[i].push({next(v, q), q, rgt, true});
                }
            }

            __int64_t job;
            #pragma omp atomic capture
            job = five++;

            done = (5*(job >> 1) >= mx);
            if (!done) {
                num v = 5*(job >> 1) + 1 + 2*(job%2);
                num p = 5;
                bool l = (job%2 == 1);
                bool b = (job < 3);

                if (v > mx)  {
                    done = true;
                } else {
                    V[v] = 1;
                    num q = other(v, p);

                    if (l == lft) {
                        if (q != p)  Q[i].push({next(v, q), q, lft, false});
                    } else {
                        if (b) Q[i].push({next(v, q), q, rgt, true});
                    }
                }
            }

           if (done)  break;
       }  // for(;;)
//     if (done)  { std::cout << i << " done " << five << std::endl; continue; }
    }  // _Pragma("omp parallel for")

    num c = mx, i;
    for (i = 1; i <= mx; ++i)  c-=V[i];
    std::cout << c << "\n";

    return 0;
}
	/* NOLINT(legal/copyright)
	f=parallel_dfs_bfs_evenly
	g++ $f.cc -O3 -Wall -pedantic -Wextra -fopenmp -o $f
	cpplint $f.cc
	cppcheck --enable=all --suppress=missingIncludeSystem $f.cc --check-config
	*/
	#include <string.h>
	#include <omp.h>

	#include <iostream>
	#include <cassert>
	#include <limits>
	#include <tuple>
	#include <queue>

	typedef __int64_t num;
	typedef __int128_t num2;

	num c;
	char *V;
	std::queue<std::tuple<num, num, bool, bool>> *Q;

	const bool lft = false;
	const bool rgt = true;

	/* Longest directed acyclic graph path is that for p=5 (3,6,8,11,...).
	Path has 40% of 10^n vertices, so DAG height is 40 billion for n=11.

	0 5 10 15 20 25 30
	+ - - - - + - - - - + - - - - + - - - - + - - - - + - - - - +...
	0+ _____3___ ________________________________20_..
	! ^ \___ _____8__ _.. (*)
	! ! ^ 6 \___ ____13__ _..
	! ! ! \______ 11 \___ ____18__
	! {3,5,lft,true} ! \ \.. 16 \___ _..
	5+ ! 10_.. (@) \.. 21
	! {6,5,rgt,true}--+ (#) \____ \..
	! ^ \___
	! {15,13,rgt,true}---------------+ \ _.. (*)
	! 15 {20,29,lft,false}
	10+ (#) 10²+11² == 14²+5² => 221 composite \..
	. (@) 16²+17² == 23²+4² => 545 composite
	\
	(left half of) great-grandparent DAG #
	https://oeis.org/A000071 3 (=5²) / \
	6 / \
	10 8 / \
	15 23 11 20 (=29²) # #
	21 23 27 41 13 37 / \ / \
	28 52 28 61 40 88 64 16 59 49 # #
	36......96..........106......92..........119 (=169²) / \ / \
	#
	/ \ */
	num other(num v, num p) {
	num2 x = v;
	x = 2x(x+1)+1;
	assert(x%p == 0);
	x /= p;
	if (x > std::numeric_limits<num>::max()) {
	return std::numeric_limits<num>::max();
	}
	return x;
	}

	num next(num v, num p) {
	num r = v%p;
	return v + (2r < p ? p-2r-1 : 2*(p-r)-1);
	}

	int main(int argc, char *argv[]) {
	assert(argc == 2);
	assert(atoi(argv[1]) >= 4);
	num mx = 1;
	for (int e = 1; e <= atoi(argv[1]); ++e) {
	mx *= 10;
	}

	V = new char[1 + mx];
	assert(V);
	explicit_bzero(V, 1+mx);

	int nproc = omp_get_max_threads();
	omp_set_num_threads(nproc);

	Q = new std::queue<std::tuple<num, num, bool, bool>>[nproc];
	assert(Q);

	__int64_t five = 1; // {3, 5, lft, true}

	_Pragma("omp parallel for")
	for (int i = 0; i < nproc; ++i) {
	bool done = false;
	for (;;) {
	while (!Q[i].empty()) {
	num v = std::get<0>(Q[i].front());
	num p = std::get<1>(Q[i].front());
	bool l = std::get<2>(Q[i].front());
	bool b = std::get<3>(Q[i].front());
	Q[i].pop();

	if (v > mx) continue;

	V[v] = 1;
	num q = other(v, p);

	if (l == lft) {
	if (q != p) Q[i].push({next(v, q), q, lft, false});
	Q[i].push({next(v, p), p, rgt, b});
	} else {
	Q[i].push({next(v, p), p, lft, false});
	if (b) Q[i].push({next(v, q), q, rgt, true});
	}
	}

	__int64_t job;
	#pragma omp atomic capture
	job = five++;

	done = (5*(job >> 1) >= mx);
	if (!done) {
	num v = 5(job >> 1) + 1 + 2(job%2);
	num p = 5;
	bool l = (job%2 == 1);
	bool b = (job < 3);

	if (v > mx) {
	done = true;
	} else {
	V[v] = 1;
	num q = other(v, p);

	if (l == lft) {
	if (q != p) Q[i].push({next(v, q), q, lft, false});
	} else {
	if (b) Q[i].push({next(v, q), q, rgt, true});
	}
	}
	}

	if (done) break;
	} // for(;;)
	// if (done) { std::cout << i << " done " << five << std::endl; continue; }
	} // _Pragma("omp parallel for")

	num c = mx, i;
	for (i = 1; i <= mx; ++i) c-=V[i];
	std::cout << c << "\n";

	return 0;
	}
No results found