achille

## gist:23680e9100db87565a8e67038797b27d
Context:
https://gist.github.com/achille/d1eadf82aa54056b9ded7706e8f56760
https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07

Claude Benchmark:
Config: Moderate (150,100) - CORRECT SOLUTIONS ONLY
────────────────────────────────────────────────────────────────────────────────
  Method                  Time       max_err    wrong/20000
  ─────────────────────────────────────────────────────────────────────────────
  faded_opt (2 iter)      30.1 ns    0.18       0  ✅

## gist:c031865213fd8e2f530fb528a0073613
Context:
https://gist.github.com/achille/d1eadf82aa54056b9ded7706e8f56760

Chatgpt pro: run
https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07

Benchmarks by Claude:

Method                    Time     Iters  sqrt/iter  Accurate?  Notes
  -------------------------------------------------------------------------------

## gist:d1eadf82aa54056b9ded7706e8f56760
Context:
https://news.ycombinator.com/reply?id=46483541

Implementations:
https://claude.ai/share/428502a2-81f0-421d-a3d7-08bceb96d039
https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07


Eval:
https://claude.ai/share/98dc0a03-0d03-4053-8460-3fb7b2b2676d

## compactness.js
// compactness() calculates how closely the resulting documents are located together
// It counts the size of the documents vs size of the unique pages they reside on

function compactness(collection, query, limit) {
    Object.size = function(o) { var size = 0, key;
        for (key in o) { if (o.hasOwnProperty(key)) size++; }
        return size; };

    count=0;
    size=0;

## max_staleness.py
import numpy as np
import scipy.optimize as spo
ir = lambda n: int(round(n))

# Constants
s_freq = 500   #server-server heartbeat frequency ms
c_freq = 10000 #client-server heartbeat frequency ms

# Simulation boundaries
threshold = 300000 # 5 minute max lag/skew

## telescoping_delete.js
/*
 * Auto-tuning delete that allows for removal of large amounts of data
 * without impacting performance. Configurable to a target load amount.
 *
 * How it works:
 * TL;DR: Delete a small slice every second; Vary the size of each slice
 *        based on how long the previous delete took; sleep; repeat.
 *
 * TODO: Modify this to allow for deletion based on objectid's date
 *       which is embedded in the first four bytes.

## check_keyspace.js
/* Check for gaps or duplicates in keyspace */

function check_keyspace(ns) {
    print("Checking: " + ns);
    str = JSON.stringify
    forwardCount=0;
    reverseCount=0;
    min = db.chunks.find(ns).pretty().sort({min: 1}).limit(1)[0]
    max = db.chunks.find(ns).pretty().sort({min:-1}).limit(1)[0]
    current = min

## piazza_intro_joyner_standardized.md

      
              1 file
            
          
              1 fork
            
          
                0 comments
              
            
              0 stars
            
          
                achille
                / piazza_intro_joyner_standardized.md
            
            
              Last active
              January 11, 2018 15:48
            
          
    Howdy folks
This is an attempt at standardizing the intro threads, and instructions on how to include a student map in them.
Note the map & question set may be added to existing Piazza threads.
Steps:
Create a Google Map

Login to Google Maps, create a new Map
Menu -> Your Places -> Maps -> Create Map


## vitter.py
"""
Vitter JS (1987) 'An efficient algorithm for sequential
random sampling.' ACM T. Math. Softw. 13(1): 58--67.

Copied from: https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0
"""

from math import exp, log
import random
import numpy as np

## enqueue.js
//Create 1gb collection & enqueue
for(i=0;i<1000;i++){db.foo.insert({f:''.pad(1024*1024,true,'A')})}
enqueueWork("test.foo")

//Each worker calls dequeue() and works on it's own range
work = dequeue("test.foo")


function enqueueWork(ns,splitSizeBytes=320000000){
    split = db.runCommand({splitVector:ns, keyPattern:{_id: 1},
	Context:
	https://gist.github.com/achille/d1eadf82aa54056b9ded7706e8f56760
	https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07

	Claude Benchmark:
	Config: Moderate (150,100) - CORRECT SOLUTIONS ONLY
	────────────────────────────────────────────────────────────────────────────────
	Method Time max_err wrong/20000
	─────────────────────────────────────────────────────────────────────────────
	faded_opt (2 iter) 30.1 ns 0.18 0 ✅
	Context:
	https://gist.github.com/achille/d1eadf82aa54056b9ded7706e8f56760

	Chatgpt pro: run
	https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07

	Benchmarks by Claude:

	Method Time Iters sqrt/iter Accurate? Notes
	-------------------------------------------------------------------------------
	Context:
	https://news.ycombinator.com/reply?id=46483541

	Implementations:
	https://claude.ai/share/428502a2-81f0-421d-a3d7-08bceb96d039
	https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07


	Eval:
	https://claude.ai/share/98dc0a03-0d03-4053-8460-3fb7b2b2676d
	// compactness() calculates how closely the resulting documents are located together
	// It counts the size of the documents vs size of the unique pages they reside on

	function compactness(collection, query, limit) {
	Object.size = function(o) { var size = 0, key;
	for (key in o) { if (o.hasOwnProperty(key)) size++; }
	return size; };

	count=0;
	size=0;
	import numpy as np
	import scipy.optimize as spo
	ir = lambda n: int(round(n))

	# Constants
	s_freq = 500 #server-server heartbeat frequency ms
	c_freq = 10000 #client-server heartbeat frequency ms

	# Simulation boundaries
	threshold = 300000 # 5 minute max lag/skew
	/*
	* Auto-tuning delete that allows for removal of large amounts of data
	* without impacting performance. Configurable to a target load amount.
	*
	* How it works:
	* TL;DR: Delete a small slice every second; Vary the size of each slice
	* based on how long the previous delete took; sleep; repeat.
	*
	* TODO: Modify this to allow for deletion based on objectid's date
	* which is embedded in the first four bytes.
	/* Check for gaps or duplicates in keyspace */

	function check_keyspace(ns) {
	print("Checking: " + ns);
	str = JSON.stringify
	forwardCount=0;
	reverseCount=0;
	min = db.chunks.find(ns).pretty().sort({min: 1}).limit(1)[0]
	max = db.chunks.find(ns).pretty().sort({min:-1}).limit(1)[0]
	current = min
	"""
	Vitter JS (1987) 'An efficient algorithm for sequential
	random sampling.' ACM T. Math. Softw. 13(1): 58--67.

	Copied from: https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0
	"""

	from math import exp, log
	import random
	import numpy as np
	//Create 1gb collection & enqueue
	for(i=0;i<1000;i++){db.foo.insert({f:''.pad(1024*1024,true,'A')})}
	enqueueWork("test.foo")

	//Each worker calls dequeue() and works on it's own range
	work = dequeue("test.foo")


	function enqueueWork(ns,splitSizeBytes=320000000){
	split = db.runCommand({splitVector:ns, keyPattern:{_id: 1},