Last active
March 25, 2017 22:00
-
-
Save matthias-k/3ca6a2bc2631246b17b3 to your computer and use it in GitHub Desktop.
draft of L-BFGS in theano
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "%matplotlib inline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.4/dist-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.\n", | |
| " warnings.warn(self.msg_depr % (key, alt_key))\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import seaborn as sns\n", | |
| "sns.set_style('white')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import theano\n", | |
| "import theano.tensor as T" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from theano.ifelse import ifelse" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Two loop recursion" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 429, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "### reference implementation in python\n", | |
| "def python_two_loop_recursion(gradient, Y, S, k, m):\n", | |
| " if k == 0:\n", | |
| " return gradient\n", | |
| " \n", | |
| " q = gradient\n", | |
| " start_loop = np.max([k-m, 0])\n", | |
| " loop_indices = range(start_loop, k)\n", | |
| " alphas = []\n", | |
| " \n", | |
| " for i in loop_indices[::-1]:\n", | |
| " s_i = S[i]\n", | |
| " y_i = Y[i]\n", | |
| " \n", | |
| " rho_i = 1/y_i.dot(s_i)\n", | |
| " alpha_i = rho_i * s_i.dot(q)\n", | |
| " alphas.append(alpha_i)\n", | |
| " q = q - alpha_i*y_i\n", | |
| " #print('q', q)\n", | |
| " \n", | |
| " #print('final q', q)\n", | |
| " \n", | |
| " alphas = alphas[::-1]\n", | |
| " #print('alpha', alphas)\n", | |
| " \n", | |
| " s_k1 = S[k-1]\n", | |
| " y_k1 = Y[k-1]\n", | |
| " \n", | |
| " #print('s_{k-1}', s_k1)\n", | |
| " #print('y_{k-1}', y_k1)\n", | |
| " \n", | |
| " gamma_k = s_k1.dot(y_k1) / y_k1.dot(y_k1)\n", | |
| " #print('gamma_k', gamma_k)\n", | |
| " r = gamma_k * q\n", | |
| " #print('r_0', r)\n", | |
| " \n", | |
| " for l, i in enumerate(loop_indices):\n", | |
| " s_i = S[i]\n", | |
| " y_i = Y[i]\n", | |
| " \n", | |
| " rho_i = 1/y_i.dot(s_i)\n", | |
| " alpha_i = alphas[l]\n", | |
| " beta = rho_i * y_i.dot(r)\n", | |
| " \n", | |
| " r = r + s_i*(alpha_i - beta)\n", | |
| " #print('r', r)\n", | |
| " \n", | |
| " return r\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 430, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def two_loop_recursion(gradient, Y, S, k, apply_Hk0):\n", | |
| " \"\"\"\n", | |
| " Build the L-BFGS two loop recursion as described in:\n", | |
| " Nocedal, Wright: Numerical Optimization, Algorithm 7.4\n", | |
| " \"\"\"\n", | |
| " \n", | |
| " not_first_step = T.gt(k,0)\n", | |
| "\n", | |
| " #### First loop\n", | |
| " \n", | |
| " def f(i, q, alpha, Y, S):\n", | |
| " valid_step = T.ge(i, 0)\n", | |
| " #i = theano.printing.Print('i')(i)\n", | |
| " store_index = i % Y.shape[0]\n", | |
| " y = Y[store_index]\n", | |
| " s = S[store_index]\n", | |
| " \n", | |
| " #y = theano.printing.Print('y')(y)\n", | |
| " #s = theano.printing.Print('s')(s)\n", | |
| " \n", | |
| " rho = 1/y.dot(s)\n", | |
| " alpha = rho * s.dot(q)\n", | |
| " new_q = q - alpha*y \n", | |
| " #new_q = theano.printing.Print('q')(new_q)\n", | |
| " return ifelse(valid_step, new_q, q), ifelse(valid_step, alpha, np.array(0.0, dtype=theano.config.floatX))\n", | |
| "\n", | |
| " q3 = gradient\n", | |
| " alpha = T.zeros(())\n", | |
| " \n", | |
| " loop_indices = T.arange(k-Y.shape[0], k)\n", | |
| "\n", | |
| " (results, alpha), updates = theano.scan(f, sequences=[loop_indices],\n", | |
| " outputs_info=[q3, alpha],\n", | |
| " non_sequences=[Y,S],\n", | |
| " strict=True,\n", | |
| " go_backwards=True)\n", | |
| " alpha = alpha[::-1]\n", | |
| " results[-1]\n", | |
| " q3 = results[-1]\n", | |
| " #q3 = theano.printing.Print('final q')(q3)\n", | |
| "\n", | |
| " #### gamma_k = s_{k-1}^Ty_{k_1} / y_{k-1}^Ty_{k-1} (7.20)\n", | |
| " #\n", | |
| " #s = S[(k - 1) % Y.shape[0]]\n", | |
| " #y = Y[(k - 1) % Y.shape[0]]\n", | |
| " #\n", | |
| " #gamma_k = ifelse(T.gt(k,0),s.dot(y)/y.dot(y), np.array(1.0, dtype=theano.config.floatX))\n", | |
| " #\n", | |
| " #### r = H_k^0 q with H_k^0 = gamma_k\n", | |
| " \n", | |
| " #r = gamma_k*q3\n", | |
| " \n", | |
| " r = apply_Hk0(q3, S, Y)\n", | |
| " #r = theano.printing.Print('r_0')(r)\n", | |
| " \n", | |
| " #alpha = theano.printing.Print('alpha')(alpha)\n", | |
| " \n", | |
| " #### Second loop\n", | |
| "\n", | |
| " def f2(i, alpha_i, r, Y, S):\n", | |
| " valid_step = T.ge(i, 0)\n", | |
| " #i = theano.printing.Print('i')(i)\n", | |
| " \n", | |
| " \n", | |
| " store_index = i % Y.shape[0]\n", | |
| " y = Y[store_index]\n", | |
| " s = S[store_index]\n", | |
| " \n", | |
| " #y = theano.printing.Print('y')(y)\n", | |
| " #s = theano.printing.Print('s')(s)\n", | |
| " \n", | |
| " #alpha_i = alpha[i]\n", | |
| "\n", | |
| " rho = 1/y.dot(s)\n", | |
| "\n", | |
| " beta = rho * y.dot(r)\n", | |
| " new_r = r + s*(alpha_i - beta)\n", | |
| " #new_r = theano.printing.Print('r')(new_r)\n", | |
| " new_r = ifelse(valid_step, new_r, r)\n", | |
| " return new_r\n", | |
| "\n", | |
| " results, updates = theano.scan(f2, sequences=[loop_indices, alpha],\n", | |
| " outputs_info=[r],\n", | |
| " non_sequences=[Y,S],\n", | |
| " go_backwards=False,\n", | |
| " strict=True)\n", | |
| " r = results[-1]\n", | |
| " \n", | |
| " return r\n", | |
| " return q3, gamma_k, s, y, s.dot(y), y.dot(y), r" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 446, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def apply_Hk0(q, S, Y):\n", | |
| " \"\"\"\n", | |
| " H_k^0 q := gamma_k q with\n", | |
| " \n", | |
| " gamma_k = s_{k-1}^Ty_{k_1} / y_{k-1}^Ty_{k-1} (7.20)\n", | |
| " \n", | |
| " \"\"\"\n", | |
| " s = S[(k - 1) % Y.shape[0]]\n", | |
| " y = Y[(k - 1) % Y.shape[0]]\n", | |
| " \n", | |
| " #s = theano.printing.Print('s_{k-1}')(s)\n", | |
| " #y = theano.printing.Print('y_{k-1}')(y)\n", | |
| " \n", | |
| "\n", | |
| " gamma_k = ifelse(T.gt(k,0),s.dot(y)/y.dot(y), np.array(1.0, dtype=theano.config.floatX))\n", | |
| " #gamma_k = theano.printing.Print('gamma_k')(gamma_k)\n", | |
| " return gamma_k * q" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "We compare the theano implementation to the python implementation:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 260, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.4/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences\n", | |
| " 'must be passed as a part of non_sequences', Warning)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "rr = two_loop_recursion(gradient, Y, S, k, apply_Hk0)\n", | |
| "f_theano = theano.function([], rr)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 261, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def f_python():\n", | |
| " gradient_value = gradient.eval()\n", | |
| " Ys = [Y.eval()[i % m] for i in range(k.eval())]\n", | |
| " Ss = [S.eval()[i % m] for i in range(k.eval())]\n", | |
| " r = python_two_loop_recursion(gradient.eval(), Ys, Ss, k.eval(), m)\n", | |
| " return r" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 217, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "k.set_value(1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 269, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([ 0.02685671, -0.36139641, -0.4481632 ])" | |
| ] | |
| }, | |
| "execution_count": 269, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "f_theano()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 270, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([ 0.02685671, -0.36139641, -0.4481632 ])" | |
| ] | |
| }, | |
| "execution_count": 270, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "f_python()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# The BGFS iteration" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 434, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def python_bfgs_iteration(gradient_func, gradients, xs, Y, S, k, m):\n", | |
| " gradient_k = gradients[k]\n", | |
| " x_k = xs[k]\n", | |
| " p_k = - python_two_loop_recursion(gradient_k, Y, S, k, m)\n", | |
| " #print(p_k)\n", | |
| " x_k1 = xs[k] + p_k\n", | |
| " s_k = x_k1 - x_k\n", | |
| " gradient_k = gradient_func(x_k)\n", | |
| " gradient_k1 = gradient_func(x_k1) \n", | |
| " y_k = gradient_k1 - gradient_k\n", | |
| " \n", | |
| " gradients.append(gradient_k1)\n", | |
| " xs.append(x_k1)\n", | |
| " Y.append(y_k)\n", | |
| " S.append(s_k)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 426, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def gradient_func(xx):\n", | |
| " #print('set')\n", | |
| " x.set_value(xx)\n", | |
| " return gradient.eval()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 132, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "x_0 = np.random.randn(N)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 456, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "xs = [x_0]\n", | |
| "gradients = [gradient_func(x_0)]\n", | |
| "Ys = []\n", | |
| "Ss = []\n", | |
| "kk = 0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 457, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 0 |x|=2.60976627311797 f(x)=0.5465937982294136\n", | |
| " 1 |x|=0.702598305133158 f(x)=0.1202228141546399\n", | |
| " 2 |x|=0.06887808876605656 f(x)=0.008279923286513797\n", | |
| " 3 |x|=0.009963579722922153 f(x)=0.0008906010431048172\n", | |
| " 4 |x|=0.0012511212735576796 f(x)=8.145131497737265e-05\n", | |
| " 5 |x|=0.00016472052711682608 f(x)=7.881494947743876e-06\n", | |
| " 6 |x|=2.1215414323718854e-05 f(x)=7.449048294397669e-07\n", | |
| " 7 |x|=2.7477632667395476e-06 f(x)=7.09333781822866e-08\n", | |
| " 8 |x|=3.547639407010765e-07 f(x)=6.733865200852477e-09\n", | |
| " 9 |x|=4.583598917870091e-08 f(x)=6.39939485738033e-10\n", | |
| " 10 |x|=5.919433633207291e-09 f(x)=6.079082487230461e-11\n", | |
| " 11 |x|=7.645240393773708e-10 f(x)=5.775645207046635e-12\n", | |
| " 12 |x|=9.873568614933486e-11 f(x)=5.487057176923128e-13\n", | |
| " 13 |x|=1.2751503804021772e-11 f(x)=5.2129920504843006e-14\n", | |
| " 14 |x|=1.6468139161101762e-12 f(x)=4.952579684696765e-15\n", | |
| " 15 |x|=2.1268069694461033e-13 f(x)=4.705188688880147e-16\n", | |
| " 16 |x|=2.7466985454931896e-14 f(x)=4.4701509509822754e-17\n", | |
| " 17 |x|=3.5472675767511894e-15 f(x)=4.24685556324816e-18\n", | |
| " 18 |x|=4.581174184375384e-16 f(x)=4.034713809881591e-19\n", | |
| " 19 |x|=5.916429023998802e-17 f(x)=3.8331692900286005e-20\n", | |
| " 20 |x|=7.64086449420329e-18 f(x)=3.641692381183098e-21\n", | |
| " 21 |x|=9.867913538888078e-19 f(x)=3.4597802714757814e-22\n", | |
| " 22 |x|=1.274407066453028e-19 f(x)=3.286955141967377e-23\n", | |
| " 23 |x|=1.6458528577560204e-20 f(x)=3.1227630852848113e-24\n", | |
| " 24 |x|=2.1255623090194172e-21 f(x)=2.966772852816304e-25\n", | |
| " 25 |x|=2.74509055138786e-22 f(x)=2.8185747432287323e-26\n", | |
| " 26 |x|=3.545189949154044e-23 f(x)=2.6777795190059003e-27\n", | |
| " 27 |x|=4.578490778355825e-24 f(x)=2.544017386713168e-28\n", | |
| " 28 |x|=5.912963228411339e-25 f(x)=2.4169370248461475e-29\n", | |
| " 29 |x|=7.636388459209945e-26 f(x)=2.2962046614124233e-30\n", | |
| " 30 |x|=9.862132816860661e-27 f(x)=2.181503197181356e-31\n", | |
| " 31 |x|=1.2736605034813096e-27 f(x)=2.072531372872752e-32\n", | |
| " 32 |x|=1.644888695226584e-28 f(x)=1.969002977896736e-33\n", | |
| " 33 |x|=2.1243171255516505e-29 f(x)=1.8706460986366015e-34\n", | |
| " 34 |x|=2.743482439272401e-30 f(x)=1.7772024042757962e-35\n", | |
| " 35 |x|=3.5431131275380794e-31 f(x)=1.6884264683018835e-36\n", | |
| " 36 |x|=4.5758086346132866e-32 f(x)=1.6040851239023861e-37\n", | |
| " 37 |x|=5.90949933206063e-33 f(x)=1.5239568515606112e-38\n", | |
| " 38 |x|=7.631914956289756e-34 f(x)=1.4478311972425314e-39\n", | |
| " 39 |x|=9.856355441828773e-35 f(x)=1.3755082196468417e-40\n", | |
| " 40 |x|=1.2729143753836577e-35 f(x)=1.3067979650662867e-41\n", | |
| " 41 |x|=1.6439250964733183e-36 f(x)=1.2415199684810618e-42\n", | |
| " 42 |x|=2.1230726709330098e-37 f(x)=1.1795027795739103e-43\n", | |
| " 43 |x|=2.741875268972016e-38 f(x)=1.1205835124220183e-44\n", | |
| " 44 |x|=3.541037522421003e-39 f(x)=1.0646074176829714e-45\n", | |
| " 45 |x|=4.573128062056071e-40 f(x)=1.0114274761511596e-46\n", | |
| " 46 |x|=5.906037464880122e-41 f(x)=9.609040126171088e-48\n", | |
| " 47 |x|=7.627444073998894e-42 f(x)=9.129043290155451e-49\n", | |
| " 48 |x|=9.850581451257635e-43 f(x)=8.673023558986891e-50\n", | |
| " 49 |x|=1.2721686843780192e-43 f(x)=8.23978321319371e-51\n", | |
| " 50 |x|=1.6429620622095138e-44 f(x)=7.828184362543092e-52\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "x_dist = np.square(xs[-1]).sum()\n", | |
| "print('{:3} |x|={} f(x)={}'.format(int(kk), x_dist, y.eval()))\n", | |
| "\n", | |
| "\n", | |
| "for i in range(50):\n", | |
| " #print('Y', Ys)\n", | |
| " #print('S', Ss)\n", | |
| " python_bfgs_iteration(gradient_func, gradients, xs, Ys, Ss, kk, m)\n", | |
| " x_dist = np.square(xs[-1]).sum()\n", | |
| " #x.set_value(xs[-1])\n", | |
| " kk += 1\n", | |
| " print('{:3} |x|={} f(x)={}'.format(int(kk), x_dist, y.eval()))\n", | |
| " \n", | |
| "\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 458, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def bfgs_update(gradient, x, Y, S, k):\n", | |
| " \"\"\"Builds the updates for the L-BGFS algorithm as described in:\n", | |
| " Nocedal, Wright: Numerical Optimization, Algorithm 7.5\"\"\"\n", | |
| " not_first_step = T.gt(k,0)\n", | |
| " \n", | |
| " ## Do the update which should have been done at the end of the last loop now where we have\n", | |
| " ## what used to be \\nabla f_{k+1}\n", | |
| " \n", | |
| " s_k = x - last_x\n", | |
| " y_k = gradient - last_gradient\n", | |
| " \n", | |
| " store_index = (k - 1) % Y.shape[0]\n", | |
| " Y_new = ifelse(not_first_step, T.set_subtensor(Y[store_index], y_k), Y)\n", | |
| " S_new = ifelse(not_first_step, T.set_subtensor(S[store_index], s_k), S)\n", | |
| " \n", | |
| " #Y_new = theano.printing.Print('Y')(Y_new)\n", | |
| " #S_new = theano.printing.Print('S')(S_new)\n", | |
| " \n", | |
| " \n", | |
| " p_k = - two_loop_recursion(gradient, Y_new,S_new, k, apply_Hk0)\n", | |
| " \n", | |
| " \n", | |
| " #p_k = theano.printing.Print('p_k')(p_k)\n", | |
| " #kk = theano.printing.Print('k')(k)\n", | |
| " kk = k\n", | |
| " new_k = kk+1\n", | |
| " \n", | |
| " \n", | |
| " x_k1 = x + p_k\n", | |
| " updates = [(x, x_k1),\n", | |
| " (last_gradient, gradient),\n", | |
| " (last_x, x),\n", | |
| " (Y, Y_new),\n", | |
| " (S, S_new),\n", | |
| " (k, new_k)]\n", | |
| " return updates\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 459, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "N = 3\n", | |
| "m = 20" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 460, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "k = theano.shared(0)\n", | |
| "last_x = theano.shared(np.random.randn(N))\n", | |
| "last_gradient = theano.shared(np.random.randn(N))\n", | |
| "S = theano.shared(np.random.standard_normal((m, N)), name='S')\n", | |
| "Y = theano.shared(np.random.standard_normal((m, N)), name='Y')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 461, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "x = theano.shared(x_0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 462, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "y = 0.2*(T.abs_(x)**2.3).sum()\n", | |
| "gradient = T.grad(y, x)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 463, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.4/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences\n", | |
| " 'must be passed as a part of non_sequences', Warning)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "updates = bfgs_update(gradient, x, Y, S, k)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 464, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "f_bfgs = theano.function([], y, updates=updates)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 465, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 0 |x|=2.60976627311797 f(x)=0.5465937982294136\n", | |
| " 1 |x|=0.702598305133158 f(x)=0.1202228141546399\n", | |
| " 2 |x|=0.06887808876605656 f(x)=0.008279923286513797\n", | |
| " 3 |x|=0.009963579722922153 f(x)=0.0008906010431048172\n", | |
| " 4 |x|=0.0012511212735576791 f(x)=8.145131497737262e-05\n", | |
| " 5 |x|=0.000164720527116826 f(x)=7.881494947743873e-06\n", | |
| " 6 |x|=2.121541432371882e-05 f(x)=7.449048294397654e-07\n", | |
| " 7 |x|=2.7477632667395425e-06 f(x)=7.093337818228644e-08\n", | |
| " 8 |x|=3.5476394070107576e-07 f(x)=6.733865200852461e-09\n", | |
| " 9 |x|=4.5835989178700805e-08 f(x)=6.399394857380312e-10\n", | |
| " 10 |x|=5.919433633207281e-09 f(x)=6.079082487230449e-11\n", | |
| " 11 |x|=7.645240393773698e-10 f(x)=5.775645207046627e-12\n", | |
| " 12 |x|=9.873568614933473e-11 f(x)=5.487057176923119e-13\n", | |
| " 13 |x|=1.2751503804021746e-11 f(x)=5.2129920504842886e-14\n", | |
| " 14 |x|=1.646813916110173e-12 f(x)=4.952579684696753e-15\n", | |
| " 15 |x|=2.126806969446094e-13 f(x)=4.705188688880122e-16\n", | |
| " 16 |x|=2.746698545493183e-14 f(x)=4.4701509509822625e-17\n", | |
| " 17 |x|=3.5472675767511807e-15 f(x)=4.2468555632481475e-18\n", | |
| " 18 |x|=4.581174184375375e-16 f(x)=4.034713809881582e-19\n", | |
| " 19 |x|=5.916429023998782e-17 f(x)=3.8331692900285855e-20\n", | |
| " 20 |x|=7.640864494203281e-18 f(x)=3.6416923811830926e-21\n", | |
| " 21 |x|=9.86791353888805e-19 f(x)=3.45978027147577e-22\n", | |
| " 22 |x|=1.2744070664530268e-19 f(x)=3.2869551419673724e-23\n", | |
| " 23 |x|=1.6458528577560162e-20 f(x)=3.122763085284802e-24\n", | |
| " 24 |x|=2.1255623090194112e-21 f(x)=2.9667728528162933e-25\n", | |
| " 25 |x|=2.7450905513878517e-22 f(x)=2.818574743228722e-26\n", | |
| " 26 |x|=3.545189949154034e-23 f(x)=2.6777795190058913e-27\n", | |
| " 27 |x|=4.578490778355809e-24 f(x)=2.544017386713158e-28\n", | |
| " 28 |x|=5.912963228411321e-25 f(x)=2.416937024846139e-29\n", | |
| " 29 |x|=7.636388459209922e-26 f(x)=2.2962046614124146e-30\n", | |
| " 30 |x|=9.862132816860626e-27 f(x)=2.181503197181347e-31\n", | |
| " 31 |x|=1.2736605034813046e-27 f(x)=2.072531372872742e-32\n", | |
| " 32 |x|=1.6448886952265766e-28 f(x)=1.9690029778967255e-33\n", | |
| " 33 |x|=2.1243171255516384e-29 f(x)=1.8706460986365884e-34\n", | |
| " 34 |x|=2.743482439272389e-30 f(x)=1.7772024042757868e-35\n", | |
| " 35 |x|=3.5431131275380575e-31 f(x)=1.6884264683018715e-36\n", | |
| " 36 |x|=4.5758086346132647e-32 f(x)=1.6040851239023767e-37\n", | |
| " 37 |x|=5.9094993320605904e-33 f(x)=1.5239568515605992e-38\n", | |
| " 38 |x|=7.631914956289707e-34 f(x)=1.4478311972425201e-39\n", | |
| " 39 |x|=9.856355441828715e-35 f(x)=1.3755082196468319e-40\n", | |
| " 40 |x|=1.2729143753836495e-35 f(x)=1.3067979650662765e-41\n", | |
| " 41 |x|=1.643925096473307e-36 f(x)=1.2415199684810516e-42\n", | |
| " 42 |x|=2.1230726709329943e-37 f(x)=1.1795027795739002e-43\n", | |
| " 43 |x|=2.741875268971997e-38 f(x)=1.1205835124220091e-44\n", | |
| " 44 |x|=3.541037522420973e-39 f(x)=1.0646074176829608e-45\n", | |
| " 45 |x|=4.573128062056035e-40 f(x)=1.0114274761511503e-46\n", | |
| " 46 |x|=5.906037464880071e-41 f(x)=9.609040126170988e-48\n", | |
| " 47 |x|=7.627444073998833e-42 f(x)=9.129043290155365e-49\n", | |
| " 48 |x|=9.85058145125755e-43 f(x)=8.6730235589868e-50\n", | |
| " 49 |x|=1.2721686843780113e-43 f(x)=8.239783213193649e-51\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#x_dist = np.square(x.get_value()).sum()\n", | |
| "#print('{:3} |x|={} f(x)={}'.format(int(k.get_value()), x_dist, y.eval()))\n", | |
| "for i in range(50):\n", | |
| " x_dist = np.square(x.get_value()).sum()\n", | |
| " value = f_bfgs()\n", | |
| " print('{:3} |x|={} f(x)={}'.format(int(k.get_value()-1), x_dist, value))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.4.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment