Created
November 21, 2025 10:39
-
-
Save gszauer/587b0e0127a678895f68f38974bd9f16 to your computer and use it in GitHub Desktop.
Part 1 Code Listing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class Loss { | |
| // Used for regression problems where the goal is to predict continuous values. | |
| // Penalizes larger errors more heavily due to the squaring, making it sensitive to outliers. | |
| static meanSquaredError(predictions, targets) { | |
| let sum = 0; | |
| for (let i = 0, size = predictions.length; i < size; i++) { | |
| const diff = predictions[i] - targets[i]; | |
| sum += diff * diff; | |
| } | |
| return sum / predictions.length; | |
| } | |
| static meanSquaredErrorDerivative(predictions, targets) { | |
| const derivatives = new Array(predictions.length); | |
| for (let i = 0; i < predictions.length; i++) { | |
| derivatives[i] = 2 * (predictions[i] - targets[i]) / predictions.length; | |
| } | |
| return derivatives; | |
| } | |
| // Also used for regression problems, but less sensitive to outliers than MSE. | |
| // Treats all errors linearly, giving equal weight to small and large errors. | |
| static meanAbsoluteError(predictions, targets) { | |
| let sum = 0; | |
| for (let i = 0; i < predictions.length; i++) { | |
| sum += Math.abs(predictions[i] - targets[i]); | |
| } | |
| return sum / predictions.length; | |
| } | |
| static meanAbsoluteErrorDerivative(predictions, targets) { | |
| const derivatives = new Array(predictions.length); | |
| for (let i = 0; i < predictions.length; i++) { | |
| const diff = predictions[i] - targets[i]; | |
| derivatives[i] = (diff > 0 ? 1 : diff < 0 ? -1 : 0) / predictions.length; | |
| } | |
| return derivatives; | |
| } | |
| // Used for binary classification problems (e.g., output is 0 or 1). | |
| // Assumes predictions are probabilities (typically from a sigmoid activation). | |
| // Measures the divergence between predicted probabilities and true binary labels. | |
| static binaryCrossEntropy(predictions, targets) { | |
| let sum = 0; | |
| for (let i = 0; i < predictions.length; i++) { | |
| const p = Math.max(1e-7, Math.min(1 - 1e-7, predictions[i])); | |
| sum += -(targets[i] * Math.log(p) + (1 - targets[i]) * Math.log(1 - p)); | |
| } | |
| return sum / predictions.length; | |
| } | |
| static binaryCrossEntropyDerivative(predictions, targets) { | |
| const derivatives = new Array(predictions.length); | |
| for (let i = 0; i < predictions.length; i++) { | |
| const p = Math.max(1e-7, Math.min(1 - 1e-7, predictions[i])); | |
| derivatives[i] = (-(targets[i] / p) + (1 - targets[i]) / (1 - p)) / predictions.length; | |
| } | |
| return derivatives; | |
| } | |
| } | |
| class Neuron { | |
| weights = null; | |
| bias = null; | |
| constructor(numberOfInputs) { | |
| this.weights = new Array(numberOfInputs); | |
| for (let i = 0; i < numberOfInputs; ++i) { | |
| this.weights[i] = Math.random() * 2 - 1; | |
| } | |
| this.bias = Math.random() * 2 - 1; | |
| } | |
| forward(inputs) { | |
| let sum = 0.0; | |
| for (let i = 0, size = inputs.length; i < size; ++i) { | |
| sum += this.weights[i] * inputs[i]; | |
| } | |
| sum += this.bias; | |
| return sum; | |
| } | |
| #calculateParameterGradients(inputs, neuronGradient) { | |
| const gradients = { | |
| bias: 0, | |
| weights: new Array(this.weights.length) | |
| }; | |
| gradients.bias = neuronGradient; | |
| for (let i = 0, size = this.weights.length; i < size; i++) { | |
| gradients.weights[i] = neuronGradient * inputs[i]; | |
| } | |
| return gradients; | |
| } | |
| #updateWeights(gradients, learningRate) { | |
| for (let i = 0; i < this.weights.length; i++) { | |
| this.weights[i] -= learningRate * gradients.weights[i]; | |
| } | |
| this.bias -= learningRate * gradients.bias; | |
| } | |
| #calculateInputGradients(neuronGradient) { | |
| const inputGradients = new Array(this.weights.length); | |
| for (let i = 0; i < this.weights.length; i++) { | |
| inputGradients[i] = neuronGradient * this.weights[i]; | |
| } | |
| return inputGradients; | |
| } | |
| backward(inputs, neuronGradient, learningRate) { | |
| const parameterGradients = this.#calculateParameterGradients(inputs, neuronGradient); | |
| const inputGradients = this.#calculateInputGradients(neuronGradient); | |
| this.#updateWeights(parameterGradients, learningRate); | |
| return inputGradients; | |
| } | |
| } | |
| class DenseLayer { | |
| neurons = null; | |
| cachedInputs = null; | |
| constructor(numberOfInputs, numberOfOutputs) { | |
| this.neurons = new Array(numberOfOutputs); | |
| for (let i = 0; i < numberOfOutputs; ++i) { | |
| this.neurons[i] = new Neuron(numberOfInputs); | |
| } | |
| this.cachedInputs = new Array(numberOfInputs); | |
| } | |
| forward(inputs) { | |
| if (this.cachedInputs == null || this.cachedInputs.length !== inputs.length) { | |
| this.cachedInputs = new Array(inputs.length); | |
| } | |
| for (let i = 0, size = inputs.length; i < size; ++i) { | |
| this.cachedInputs[i] = inputs[i]; | |
| } | |
| const outputs = new Array(this.neurons.length); | |
| for (let i = 0, size = this.neurons.length; i < size; ++i) { | |
| outputs[i] = this.neurons[i].forward(inputs); | |
| } | |
| return outputs; | |
| } | |
| backward(outputGradients, learningRate) { | |
| const inputGradients = new Array(this.cachedInputs.length); | |
| for (let i = 0; i < inputGradients.length; i++) { | |
| inputGradients[i] = 0; | |
| } | |
| for (let neuronIdx = 0; neuronIdx < this.neurons.length; neuronIdx++) { | |
| const neuron = this.neurons[neuronIdx]; | |
| const neuronsInputGradients = neuron.backward( | |
| this.cachedInputs, | |
| outputGradients[neuronIdx], | |
| learningRate | |
| ); | |
| for (let i = 0; i < neuronsInputGradients.length; i++) { | |
| inputGradients[i] += neuronsInputGradients[i]; | |
| } | |
| } | |
| return inputGradients; | |
| } | |
| } | |
| class ActivationLayer { | |
| kind = "relu"; | |
| cachedInputs = null; | |
| constructor(layerType = "relu") { | |
| if (layerType === "relu" || layerType === "sigmoid" || layerType === "tanh") { | |
| this.kind = layerType; | |
| } | |
| } | |
| #reluActivation(x) { | |
| return Math.max(0, x); | |
| } | |
| #sigmoidActivation(x) { | |
| return 1 / (1 + Math.exp(-x)); | |
| } | |
| #tanhActivation(x) { | |
| return Math.tanh(x); | |
| } | |
| #reluDerivative(x) { | |
| return x > 0 ? 1 : 0; | |
| } | |
| #sigmoidDerivative(x) { | |
| const sig = this.#sigmoidActivation(x); | |
| return sig * (1 - sig); | |
| } | |
| #tanhDerivative(x) { | |
| const t = Math.tanh(x); | |
| return 1 - t * t; | |
| } | |
| forward(inputs) { | |
| if (this.cachedInputs == null || this.cachedInputs.length !== inputs.length) { | |
| this.cachedInputs = new Array(inputs.length); | |
| } | |
| for (let i = 0, size = inputs.length; i < size; ++i) { | |
| this.cachedInputs[i] = inputs[i]; | |
| } | |
| const output = new Array(inputs.length); | |
| if (this.kind === "relu") { | |
| for (let i = 0, size = inputs.length; i < size; ++i) { | |
| output[i] = this.#reluActivation(inputs[i]); | |
| } | |
| } | |
| else if (this.kind === "sigmoid") { | |
| for (let i = 0, size = inputs.length; i < size; ++i) { | |
| output[i] = this.#sigmoidActivation(inputs[i]); | |
| } | |
| } | |
| else if (this.kind === "tanh") { | |
| for (let i = 0, size = inputs.length; i < size; ++i) { | |
| output[i] = this.#tanhActivation(inputs[i]); | |
| } | |
| } | |
| else { | |
| return null; | |
| } | |
| return output; | |
| } | |
| backward(outputGradients) { | |
| const inputGradients = new Array(outputGradients.length); | |
| if (this.kind === "relu") { | |
| for (let i = 0; i < outputGradients.length; i++) { | |
| inputGradients[i] = outputGradients[i] * this.#reluDerivative(this.cachedInputs[i]); | |
| } | |
| } | |
| else if (this.kind === "sigmoid") { | |
| for (let i = 0; i < outputGradients.length; i++) { | |
| inputGradients[i] = outputGradients[i] * this.#sigmoidDerivative(this.cachedInputs[i]); | |
| } | |
| } | |
| else if (this.kind === "tanh") { | |
| for (let i = 0; i < outputGradients.length; i++) { | |
| inputGradients[i] = outputGradients[i] * this.#tanhDerivative(this.cachedInputs[i]); | |
| } | |
| } | |
| return inputGradients; | |
| } | |
| } | |
| const trainingData = [ | |
| { input: [0, 0], target: [0] }, | |
| { input: [0, 1], target: [1] }, | |
| { input: [1, 0], target: [1] }, | |
| { input: [1, 1], target: [0] } | |
| ]; | |
| const layer1 = new DenseLayer(2, 4); | |
| const activation1 = new ActivationLayer("tanh"); | |
| const layer2 = new DenseLayer(4, 1); | |
| const activation2 = new ActivationLayer("sigmoid"); | |
| const learningRate = 0.1; | |
| const epochs = 100000; | |
| for (let epoch = 0; epoch < epochs; epoch++) { | |
| let totalLoss = 0; | |
| for (const data of trainingData) { | |
| let output = layer1.forward(data.input); | |
| output = activation1.forward(output); | |
| output = layer2.forward(output); | |
| output = activation2.forward(output); | |
| const loss = Loss.meanSquaredError(output, data.target); | |
| totalLoss += loss; | |
| let gradients = Loss.meanSquaredErrorDerivative(output, data.target); | |
| gradients = activation2.backward(gradients); | |
| gradients = layer2.backward(gradients, learningRate); | |
| gradients = activation1.backward(gradients); | |
| gradients = layer1.backward(gradients, learningRate); | |
| } | |
| if (epoch % 1000 === 0) { | |
| console.log(`Epoch ${epoch}: Average Loss = ${totalLoss / trainingData.length}`); | |
| } | |
| } | |
| console.log("\n=== Testing Trained Network ==="); | |
| for (const data of trainingData) { | |
| let output = layer1.forward(data.input); | |
| output = activation1.forward(output); | |
| output = layer2.forward(output); | |
| output = activation2.forward(output); | |
| console.log(`Input: [${data.input}] -> Output: ${output[0].toFixed(3)} (Target: ${data.target[0]})`); | |
| } | |
| function predict(input) { | |
| let output = layer1.forward(input); | |
| output = activation1.forward(output); | |
| output = layer2.forward(output); | |
| output = activation2.forward(output); | |
| return output[0]; | |
| } | |
| console.log("\n=== Testing Intermediate Values ==="); | |
| console.log(`[0.1, 0.1] -> ${predict([0.1, 0.1]).toFixed(3)} (should be close to 0)`); | |
| console.log(`[0.1, 0.9] -> ${predict([0.1, 0.9]).toFixed(3)} (should be close to 1)`); | |
| console.log(`[0.9, 0.1] -> ${predict([0.9, 0.1]).toFixed(3)} (should be close to 1)`); | |
| console.log(`[0.9, 0.9] -> ${predict([0.9, 0.9]).toFixed(3)} (should be close to 0)`); | |
| console.log("\n=== Debug: Learned Parameters (layer1) ==="); | |
| console.log(`layer1.neurons[0].weights[0] = ${layer1.neurons[0].weights[0]};`); | |
| console.log(`layer1.neurons[0].weights[1] = ${layer1.neurons[0].weights[1]};`); | |
| console.log(`layer1.neurons[0].bias = ${layer1.neurons[0].bias};`); | |
| console.log(`layer1.neurons[1].weights[0] = ${layer1.neurons[1].weights[0]};`); | |
| console.log(`layer1.neurons[1].weights[1] = ${layer1.neurons[1].weights[1]};`); | |
| console.log(`layer1.neurons[1].bias = ${layer1.neurons[1].bias};`); | |
| console.log(`layer1.neurons[2].weights[0] = ${layer1.neurons[2].weights[0]};`); | |
| console.log(`layer1.neurons[2].weights[1] = ${layer1.neurons[2].weights[1]};`); | |
| console.log(`layer1.neurons[2].bias = ${layer1.neurons[2].bias};`); | |
| console.log(`layer1.neurons[3].weights[0] = ${layer1.neurons[3].weights[0]};`); | |
| console.log(`layer1.neurons[3].weights[1] = ${layer1.neurons[3].weights[1]};`); | |
| console.log(`layer1.neurons[3].bias = ${layer1.neurons[3].bias};`); | |
| console.log("\n=== Debug: Learned Parameters (layer2) ==="); | |
| console.log(`layer2.neurons[0].weights[0] = ${layer2.neurons[0].weights[0]};`); | |
| console.log(`layer2.neurons[0].weights[1] = ${layer2.neurons[0].weights[1]};`); | |
| console.log(`layer2.neurons[0].weights[2] = ${layer2.neurons[0].weights[2]};`); | |
| console.log(`layer2.neurons[0].weights[3] = ${layer2.neurons[0].weights[3]};`); | |
| console.log(`layer2.neurons[0].bias = ${layer2.neurons[0].bias};`); | |
| function xor_ai(left, right) { | |
| const layer1 = new DenseLayer(2, 4); | |
| const activation1 = new ActivationLayer("tanh"); | |
| const layer2 = new DenseLayer(4, 1); | |
| const activation2 = new ActivationLayer("sigmoid"); | |
| layer1.neurons[0].weights[0] = -2.483405330352288; | |
| layer1.neurons[0].weights[1] = 3.746893395232311; | |
| layer1.neurons[0].bias = 0.8972583832821088; | |
| layer1.neurons[1].weights[0] = -3.653234475692758; | |
| layer1.neurons[1].weights[1] = 0.9955207401046027; | |
| layer1.neurons[1].bias = 0.5799612103320189; | |
| layer1.neurons[2].weights[0] = -1.986455463777911; | |
| layer1.neurons[2].weights[1] = -2.140729883658909; | |
| layer1.neurons[2].bias = 0.1773771186808191; | |
| layer1.neurons[3].weights[0] = -3.315691022651759; | |
| layer1.neurons[3].weights[1] = -3.478943831512278; | |
| layer1.neurons[3].bias = 1.0466264947557882; | |
| layer2.neurons[0].weights[0] = -5.891722647970969; | |
| layer2.neurons[0].weights[1] = 5.841764877092181; | |
| layer2.neurons[0].weights[2] = -2.167853628782214; | |
| layer2.neurons[0].weights[3] = -4.926738682524884; | |
| layer2.neurons[0].bias = -0.9654887789785622; | |
| // Run network | |
| let output = layer1.forward([left, right]); | |
| output = activation1.forward(output); | |
| output = layer2.forward(output); | |
| output = activation2.forward(output); | |
| console.log(`Input: [${left}, ${right}] -> Output: ${output[0].toFixed(3)}`); | |
| return output[0]; | |
| } | |
| console.log("\n=== Testing Saved Values ==="); | |
| console.log(`[0, 0] -> ${xor_ai(0, 0).toFixed(3)} (should be close to 0)`); | |
| console.log(`[0, 1] -> ${xor_ai(0, 1).toFixed(3)} (should be close to 1)`); | |
| console.log(`[1, 0] -> ${xor_ai(1, 0).toFixed(3)} (should be close to 1)`); | |
| console.log(`[1, 1] -> ${xor_ai(1, 1).toFixed(3)} (should be close to 0)`); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment