Skip to content

Instantly share code, notes, and snippets.

@gszauer
Created November 21, 2025 10:39
Show Gist options
  • Select an option

  • Save gszauer/587b0e0127a678895f68f38974bd9f16 to your computer and use it in GitHub Desktop.

Select an option

Save gszauer/587b0e0127a678895f68f38974bd9f16 to your computer and use it in GitHub Desktop.
Part 1 Code Listing
class Loss {
// Used for regression problems where the goal is to predict continuous values.
// Penalizes larger errors more heavily due to the squaring, making it sensitive to outliers.
static meanSquaredError(predictions, targets) {
let sum = 0;
for (let i = 0, size = predictions.length; i < size; i++) {
const diff = predictions[i] - targets[i];
sum += diff * diff;
}
return sum / predictions.length;
}
static meanSquaredErrorDerivative(predictions, targets) {
const derivatives = new Array(predictions.length);
for (let i = 0; i < predictions.length; i++) {
derivatives[i] = 2 * (predictions[i] - targets[i]) / predictions.length;
}
return derivatives;
}
// Also used for regression problems, but less sensitive to outliers than MSE.
// Treats all errors linearly, giving equal weight to small and large errors.
static meanAbsoluteError(predictions, targets) {
let sum = 0;
for (let i = 0; i < predictions.length; i++) {
sum += Math.abs(predictions[i] - targets[i]);
}
return sum / predictions.length;
}
static meanAbsoluteErrorDerivative(predictions, targets) {
const derivatives = new Array(predictions.length);
for (let i = 0; i < predictions.length; i++) {
const diff = predictions[i] - targets[i];
derivatives[i] = (diff > 0 ? 1 : diff < 0 ? -1 : 0) / predictions.length;
}
return derivatives;
}
// Used for binary classification problems (e.g., output is 0 or 1).
// Assumes predictions are probabilities (typically from a sigmoid activation).
// Measures the divergence between predicted probabilities and true binary labels.
static binaryCrossEntropy(predictions, targets) {
let sum = 0;
for (let i = 0; i < predictions.length; i++) {
const p = Math.max(1e-7, Math.min(1 - 1e-7, predictions[i]));
sum += -(targets[i] * Math.log(p) + (1 - targets[i]) * Math.log(1 - p));
}
return sum / predictions.length;
}
static binaryCrossEntropyDerivative(predictions, targets) {
const derivatives = new Array(predictions.length);
for (let i = 0; i < predictions.length; i++) {
const p = Math.max(1e-7, Math.min(1 - 1e-7, predictions[i]));
derivatives[i] = (-(targets[i] / p) + (1 - targets[i]) / (1 - p)) / predictions.length;
}
return derivatives;
}
}
class Neuron {
weights = null;
bias = null;
constructor(numberOfInputs) {
this.weights = new Array(numberOfInputs);
for (let i = 0; i < numberOfInputs; ++i) {
this.weights[i] = Math.random() * 2 - 1;
}
this.bias = Math.random() * 2 - 1;
}
forward(inputs) {
let sum = 0.0;
for (let i = 0, size = inputs.length; i < size; ++i) {
sum += this.weights[i] * inputs[i];
}
sum += this.bias;
return sum;
}
#calculateParameterGradients(inputs, neuronGradient) {
const gradients = {
bias: 0,
weights: new Array(this.weights.length)
};
gradients.bias = neuronGradient;
for (let i = 0, size = this.weights.length; i < size; i++) {
gradients.weights[i] = neuronGradient * inputs[i];
}
return gradients;
}
#updateWeights(gradients, learningRate) {
for (let i = 0; i < this.weights.length; i++) {
this.weights[i] -= learningRate * gradients.weights[i];
}
this.bias -= learningRate * gradients.bias;
}
#calculateInputGradients(neuronGradient) {
const inputGradients = new Array(this.weights.length);
for (let i = 0; i < this.weights.length; i++) {
inputGradients[i] = neuronGradient * this.weights[i];
}
return inputGradients;
}
backward(inputs, neuronGradient, learningRate) {
const parameterGradients = this.#calculateParameterGradients(inputs, neuronGradient);
const inputGradients = this.#calculateInputGradients(neuronGradient);
this.#updateWeights(parameterGradients, learningRate);
return inputGradients;
}
}
class DenseLayer {
neurons = null;
cachedInputs = null;
constructor(numberOfInputs, numberOfOutputs) {
this.neurons = new Array(numberOfOutputs);
for (let i = 0; i < numberOfOutputs; ++i) {
this.neurons[i] = new Neuron(numberOfInputs);
}
this.cachedInputs = new Array(numberOfInputs);
}
forward(inputs) {
if (this.cachedInputs == null || this.cachedInputs.length !== inputs.length) {
this.cachedInputs = new Array(inputs.length);
}
for (let i = 0, size = inputs.length; i < size; ++i) {
this.cachedInputs[i] = inputs[i];
}
const outputs = new Array(this.neurons.length);
for (let i = 0, size = this.neurons.length; i < size; ++i) {
outputs[i] = this.neurons[i].forward(inputs);
}
return outputs;
}
backward(outputGradients, learningRate) {
const inputGradients = new Array(this.cachedInputs.length);
for (let i = 0; i < inputGradients.length; i++) {
inputGradients[i] = 0;
}
for (let neuronIdx = 0; neuronIdx < this.neurons.length; neuronIdx++) {
const neuron = this.neurons[neuronIdx];
const neuronsInputGradients = neuron.backward(
this.cachedInputs,
outputGradients[neuronIdx],
learningRate
);
for (let i = 0; i < neuronsInputGradients.length; i++) {
inputGradients[i] += neuronsInputGradients[i];
}
}
return inputGradients;
}
}
class ActivationLayer {
kind = "relu";
cachedInputs = null;
constructor(layerType = "relu") {
if (layerType === "relu" || layerType === "sigmoid" || layerType === "tanh") {
this.kind = layerType;
}
}
#reluActivation(x) {
return Math.max(0, x);
}
#sigmoidActivation(x) {
return 1 / (1 + Math.exp(-x));
}
#tanhActivation(x) {
return Math.tanh(x);
}
#reluDerivative(x) {
return x > 0 ? 1 : 0;
}
#sigmoidDerivative(x) {
const sig = this.#sigmoidActivation(x);
return sig * (1 - sig);
}
#tanhDerivative(x) {
const t = Math.tanh(x);
return 1 - t * t;
}
forward(inputs) {
if (this.cachedInputs == null || this.cachedInputs.length !== inputs.length) {
this.cachedInputs = new Array(inputs.length);
}
for (let i = 0, size = inputs.length; i < size; ++i) {
this.cachedInputs[i] = inputs[i];
}
const output = new Array(inputs.length);
if (this.kind === "relu") {
for (let i = 0, size = inputs.length; i < size; ++i) {
output[i] = this.#reluActivation(inputs[i]);
}
}
else if (this.kind === "sigmoid") {
for (let i = 0, size = inputs.length; i < size; ++i) {
output[i] = this.#sigmoidActivation(inputs[i]);
}
}
else if (this.kind === "tanh") {
for (let i = 0, size = inputs.length; i < size; ++i) {
output[i] = this.#tanhActivation(inputs[i]);
}
}
else {
return null;
}
return output;
}
backward(outputGradients) {
const inputGradients = new Array(outputGradients.length);
if (this.kind === "relu") {
for (let i = 0; i < outputGradients.length; i++) {
inputGradients[i] = outputGradients[i] * this.#reluDerivative(this.cachedInputs[i]);
}
}
else if (this.kind === "sigmoid") {
for (let i = 0; i < outputGradients.length; i++) {
inputGradients[i] = outputGradients[i] * this.#sigmoidDerivative(this.cachedInputs[i]);
}
}
else if (this.kind === "tanh") {
for (let i = 0; i < outputGradients.length; i++) {
inputGradients[i] = outputGradients[i] * this.#tanhDerivative(this.cachedInputs[i]);
}
}
return inputGradients;
}
}
const trainingData = [
{ input: [0, 0], target: [0] },
{ input: [0, 1], target: [1] },
{ input: [1, 0], target: [1] },
{ input: [1, 1], target: [0] }
];
const layer1 = new DenseLayer(2, 4);
const activation1 = new ActivationLayer("tanh");
const layer2 = new DenseLayer(4, 1);
const activation2 = new ActivationLayer("sigmoid");
const learningRate = 0.1;
const epochs = 100000;
for (let epoch = 0; epoch < epochs; epoch++) {
let totalLoss = 0;
for (const data of trainingData) {
let output = layer1.forward(data.input);
output = activation1.forward(output);
output = layer2.forward(output);
output = activation2.forward(output);
const loss = Loss.meanSquaredError(output, data.target);
totalLoss += loss;
let gradients = Loss.meanSquaredErrorDerivative(output, data.target);
gradients = activation2.backward(gradients);
gradients = layer2.backward(gradients, learningRate);
gradients = activation1.backward(gradients);
gradients = layer1.backward(gradients, learningRate);
}
if (epoch % 1000 === 0) {
console.log(`Epoch ${epoch}: Average Loss = ${totalLoss / trainingData.length}`);
}
}
console.log("\n=== Testing Trained Network ===");
for (const data of trainingData) {
let output = layer1.forward(data.input);
output = activation1.forward(output);
output = layer2.forward(output);
output = activation2.forward(output);
console.log(`Input: [${data.input}] -> Output: ${output[0].toFixed(3)} (Target: ${data.target[0]})`);
}
function predict(input) {
let output = layer1.forward(input);
output = activation1.forward(output);
output = layer2.forward(output);
output = activation2.forward(output);
return output[0];
}
console.log("\n=== Testing Intermediate Values ===");
console.log(`[0.1, 0.1] -> ${predict([0.1, 0.1]).toFixed(3)} (should be close to 0)`);
console.log(`[0.1, 0.9] -> ${predict([0.1, 0.9]).toFixed(3)} (should be close to 1)`);
console.log(`[0.9, 0.1] -> ${predict([0.9, 0.1]).toFixed(3)} (should be close to 1)`);
console.log(`[0.9, 0.9] -> ${predict([0.9, 0.9]).toFixed(3)} (should be close to 0)`);
console.log("\n=== Debug: Learned Parameters (layer1) ===");
console.log(`layer1.neurons[0].weights[0] = ${layer1.neurons[0].weights[0]};`);
console.log(`layer1.neurons[0].weights[1] = ${layer1.neurons[0].weights[1]};`);
console.log(`layer1.neurons[0].bias = ${layer1.neurons[0].bias};`);
console.log(`layer1.neurons[1].weights[0] = ${layer1.neurons[1].weights[0]};`);
console.log(`layer1.neurons[1].weights[1] = ${layer1.neurons[1].weights[1]};`);
console.log(`layer1.neurons[1].bias = ${layer1.neurons[1].bias};`);
console.log(`layer1.neurons[2].weights[0] = ${layer1.neurons[2].weights[0]};`);
console.log(`layer1.neurons[2].weights[1] = ${layer1.neurons[2].weights[1]};`);
console.log(`layer1.neurons[2].bias = ${layer1.neurons[2].bias};`);
console.log(`layer1.neurons[3].weights[0] = ${layer1.neurons[3].weights[0]};`);
console.log(`layer1.neurons[3].weights[1] = ${layer1.neurons[3].weights[1]};`);
console.log(`layer1.neurons[3].bias = ${layer1.neurons[3].bias};`);
console.log("\n=== Debug: Learned Parameters (layer2) ===");
console.log(`layer2.neurons[0].weights[0] = ${layer2.neurons[0].weights[0]};`);
console.log(`layer2.neurons[0].weights[1] = ${layer2.neurons[0].weights[1]};`);
console.log(`layer2.neurons[0].weights[2] = ${layer2.neurons[0].weights[2]};`);
console.log(`layer2.neurons[0].weights[3] = ${layer2.neurons[0].weights[3]};`);
console.log(`layer2.neurons[0].bias = ${layer2.neurons[0].bias};`);
function xor_ai(left, right) {
const layer1 = new DenseLayer(2, 4);
const activation1 = new ActivationLayer("tanh");
const layer2 = new DenseLayer(4, 1);
const activation2 = new ActivationLayer("sigmoid");
layer1.neurons[0].weights[0] = -2.483405330352288;
layer1.neurons[0].weights[1] = 3.746893395232311;
layer1.neurons[0].bias = 0.8972583832821088;
layer1.neurons[1].weights[0] = -3.653234475692758;
layer1.neurons[1].weights[1] = 0.9955207401046027;
layer1.neurons[1].bias = 0.5799612103320189;
layer1.neurons[2].weights[0] = -1.986455463777911;
layer1.neurons[2].weights[1] = -2.140729883658909;
layer1.neurons[2].bias = 0.1773771186808191;
layer1.neurons[3].weights[0] = -3.315691022651759;
layer1.neurons[3].weights[1] = -3.478943831512278;
layer1.neurons[3].bias = 1.0466264947557882;
layer2.neurons[0].weights[0] = -5.891722647970969;
layer2.neurons[0].weights[1] = 5.841764877092181;
layer2.neurons[0].weights[2] = -2.167853628782214;
layer2.neurons[0].weights[3] = -4.926738682524884;
layer2.neurons[0].bias = -0.9654887789785622;
// Run network
let output = layer1.forward([left, right]);
output = activation1.forward(output);
output = layer2.forward(output);
output = activation2.forward(output);
console.log(`Input: [${left}, ${right}] -> Output: ${output[0].toFixed(3)}`);
return output[0];
}
console.log("\n=== Testing Saved Values ===");
console.log(`[0, 0] -> ${xor_ai(0, 0).toFixed(3)} (should be close to 0)`);
console.log(`[0, 1] -> ${xor_ai(0, 1).toFixed(3)} (should be close to 1)`);
console.log(`[1, 0] -> ${xor_ai(1, 0).toFixed(3)} (should be close to 1)`);
console.log(`[1, 1] -> ${xor_ai(1, 1).toFixed(3)} (should be close to 0)`);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment