matthias-k/L-BFGS.ipynb

## L-BFGS.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.4/dist-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.\n",
      "  warnings.warn(self.msg_depr % (key, alt_key))\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "sns.set_style('white')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import theano\n",
    "import theano.tensor as T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from theano.ifelse import ifelse"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Two loop recursion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 429,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "### reference implementation in python\n",
    "def python_two_loop_recursion(gradient, Y, S, k, m):\n",
    "    if k == 0:\n",
    "        return gradient\n",
    "    \n",
    "    q = gradient\n",
    "    start_loop = np.max([k-m, 0])\n",
    "    loop_indices = range(start_loop, k)\n",
    "    alphas = []\n",
    "    \n",
    "    for i in loop_indices[::-1]:\n",
    "        s_i = S[i]\n",
    "        y_i = Y[i]\n",
    "        \n",
    "        rho_i = 1/y_i.dot(s_i)\n",
    "        alpha_i = rho_i * s_i.dot(q)\n",
    "        alphas.append(alpha_i)\n",
    "        q = q - alpha_i*y_i\n",
    "        #print('q', q)\n",
    "    \n",
    "    #print('final q', q)\n",
    "    \n",
    "    alphas = alphas[::-1]\n",
    "    #print('alpha', alphas)\n",
    "    \n",
    "    s_k1 = S[k-1]\n",
    "    y_k1 = Y[k-1]\n",
    "    \n",
    "    #print('s_{k-1}', s_k1)\n",
    "    #print('y_{k-1}', y_k1)\n",
    "    \n",
    "    gamma_k = s_k1.dot(y_k1) / y_k1.dot(y_k1)\n",
    "    #print('gamma_k', gamma_k)\n",
    "    r = gamma_k * q\n",
    "    #print('r_0', r)\n",
    "    \n",
    "    for l, i in enumerate(loop_indices):\n",
    "        s_i = S[i]\n",
    "        y_i = Y[i]\n",
    "        \n",
    "        rho_i = 1/y_i.dot(s_i)\n",
    "        alpha_i = alphas[l]\n",
    "        beta = rho_i * y_i.dot(r)\n",
    "        \n",
    "        r = r + s_i*(alpha_i - beta)\n",
    "        #print('r', r)\n",
    "    \n",
    "    return r\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 430,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def two_loop_recursion(gradient, Y, S, k, apply_Hk0):\n",
    "    \"\"\"\n",
    "    Build the L-BFGS two loop recursion as described in:\n",
    "    Nocedal, Wright: Numerical Optimization, Algorithm 7.4\n",
    "    \"\"\"\n",
    "    \n",
    "    not_first_step = T.gt(k,0)\n",
    "\n",
    "    #### First loop\n",
    "    \n",
    "    def f(i, q, alpha, Y, S):\n",
    "        valid_step = T.ge(i, 0)\n",
    "        #i = theano.printing.Print('i')(i)\n",
    "        store_index = i % Y.shape[0]\n",
    "        y = Y[store_index]\n",
    "        s = S[store_index]\n",
    "        \n",
    "        #y = theano.printing.Print('y')(y)\n",
    "        #s = theano.printing.Print('s')(s)\n",
    "        \n",
    "        rho = 1/y.dot(s)\n",
    "        alpha = rho * s.dot(q)\n",
    "        new_q = q - alpha*y \n",
    "        #new_q = theano.printing.Print('q')(new_q)\n",
    "        return ifelse(valid_step, new_q, q), ifelse(valid_step, alpha, np.array(0.0, dtype=theano.config.floatX))\n",
    "\n",
    "    q3 = gradient\n",
    "    alpha = T.zeros(())\n",
    "    \n",
    "    loop_indices = T.arange(k-Y.shape[0], k)\n",
    "\n",
    "    (results, alpha), updates = theano.scan(f, sequences=[loop_indices],\n",
    "                              outputs_info=[q3, alpha],\n",
    "                              non_sequences=[Y,S],\n",
    "                              strict=True,\n",
    "                              go_backwards=True)\n",
    "    alpha = alpha[::-1]\n",
    "    results[-1]\n",
    "    q3 = results[-1]\n",
    "    #q3 = theano.printing.Print('final q')(q3)\n",
    "\n",
    "    #### gamma_k = s_{k-1}^Ty_{k_1} / y_{k-1}^Ty_{k-1}  (7.20)\n",
    "    #\n",
    "    #s = S[(k - 1) % Y.shape[0]]\n",
    "    #y = Y[(k - 1) % Y.shape[0]]\n",
    "    #\n",
    "    #gamma_k = ifelse(T.gt(k,0),s.dot(y)/y.dot(y), np.array(1.0, dtype=theano.config.floatX))\n",
    "    #\n",
    "    #### r = H_k^0 q with H_k^0 = gamma_k\n",
    "    \n",
    "    #r = gamma_k*q3\n",
    "    \n",
    "    r = apply_Hk0(q3, S, Y)\n",
    "    #r = theano.printing.Print('r_0')(r)\n",
    "    \n",
    "    #alpha = theano.printing.Print('alpha')(alpha)\n",
    "    \n",
    "    #### Second loop\n",
    "\n",
    "    def f2(i, alpha_i, r, Y, S):\n",
    "        valid_step = T.ge(i, 0)\n",
    "        #i = theano.printing.Print('i')(i)\n",
    "        \n",
    "        \n",
    "        store_index = i % Y.shape[0]\n",
    "        y = Y[store_index]\n",
    "        s = S[store_index]\n",
    "        \n",
    "        #y = theano.printing.Print('y')(y)\n",
    "        #s = theano.printing.Print('s')(s)\n",
    "        \n",
    "        #alpha_i = alpha[i]\n",
    "\n",
    "        rho = 1/y.dot(s)\n",
    "\n",
    "        beta = rho * y.dot(r)\n",
    "        new_r = r + s*(alpha_i - beta)\n",
    "        #new_r = theano.printing.Print('r')(new_r)\n",
    "        new_r = ifelse(valid_step, new_r, r)\n",
    "        return new_r\n",
    "\n",
    "    results, updates = theano.scan(f2, sequences=[loop_indices, alpha],\n",
    "                             outputs_info=[r],\n",
    "                             non_sequences=[Y,S],\n",
    "                             go_backwards=False,\n",
    "                             strict=True)\n",
    "    r = results[-1]\n",
    "    \n",
    "    return r\n",
    "    return q3, gamma_k, s, y, s.dot(y), y.dot(y), r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 446,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def apply_Hk0(q, S, Y):\n",
    "    \"\"\"\n",
    "    H_k^0 q := gamma_k q with\n",
    "    \n",
    "    gamma_k = s_{k-1}^Ty_{k_1} / y_{k-1}^Ty_{k-1}  (7.20)\n",
    "    \n",
    "    \"\"\"\n",
    "    s = S[(k - 1) % Y.shape[0]]\n",
    "    y = Y[(k - 1) % Y.shape[0]]\n",
    "    \n",
    "    #s = theano.printing.Print('s_{k-1}')(s)\n",
    "    #y = theano.printing.Print('y_{k-1}')(y)\n",
    "        \n",
    "\n",
    "    gamma_k = ifelse(T.gt(k,0),s.dot(y)/y.dot(y), np.array(1.0, dtype=theano.config.floatX))\n",
    "    #gamma_k = theano.printing.Print('gamma_k')(gamma_k)\n",
    "    return gamma_k * q"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We compare the theano implementation to the python implementation:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 260,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.4/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences\n",
      "  'must be passed as a part of non_sequences', Warning)\n"
     ]
    }
   ],
   "source": [
    "rr = two_loop_recursion(gradient, Y, S, k, apply_Hk0)\n",
    "f_theano = theano.function([], rr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 261,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def f_python():\n",
    "    gradient_value = gradient.eval()\n",
    "    Ys = [Y.eval()[i % m] for i in range(k.eval())]\n",
    "    Ss = [S.eval()[i % m] for i in range(k.eval())]\n",
    "    r = python_two_loop_recursion(gradient.eval(), Ys, Ss, k.eval(), m)\n",
    "    return r"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "k.set_value(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 269,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.02685671, -0.36139641, -0.4481632 ])"
      ]
     },
     "execution_count": 269,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f_theano()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 270,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.02685671, -0.36139641, -0.4481632 ])"
      ]
     },
     "execution_count": 270,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f_python()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The BGFS iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 434,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def python_bfgs_iteration(gradient_func, gradients, xs, Y, S, k, m):\n",
    "    gradient_k = gradients[k]\n",
    "    x_k = xs[k]\n",
    "    p_k = - python_two_loop_recursion(gradient_k, Y, S, k, m)\n",
    "    #print(p_k)\n",
    "    x_k1 = xs[k] + p_k\n",
    "    s_k = x_k1 - x_k\n",
    "    gradient_k = gradient_func(x_k)\n",
    "    gradient_k1 = gradient_func(x_k1) \n",
    "    y_k = gradient_k1 - gradient_k\n",
    "    \n",
    "    gradients.append(gradient_k1)\n",
    "    xs.append(x_k1)\n",
    "    Y.append(y_k)\n",
    "    S.append(s_k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 426,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def gradient_func(xx):\n",
    "    #print('set')\n",
    "    x.set_value(xx)\n",
    "    return gradient.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x_0 = np.random.randn(N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 456,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "xs = [x_0]\n",
    "gradients = [gradient_func(x_0)]\n",
    "Ys = []\n",
    "Ss = []\n",
    "kk = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 457,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0 |x|=2.60976627311797 f(x)=0.5465937982294136\n",
      "  1 |x|=0.702598305133158 f(x)=0.1202228141546399\n",
      "  2 |x|=0.06887808876605656 f(x)=0.008279923286513797\n",
      "  3 |x|=0.009963579722922153 f(x)=0.0008906010431048172\n",
      "  4 |x|=0.0012511212735576796 f(x)=8.145131497737265e-05\n",
      "  5 |x|=0.00016472052711682608 f(x)=7.881494947743876e-06\n",
      "  6 |x|=2.1215414323718854e-05 f(x)=7.449048294397669e-07\n",
      "  7 |x|=2.7477632667395476e-06 f(x)=7.09333781822866e-08\n",
      "  8 |x|=3.547639407010765e-07 f(x)=6.733865200852477e-09\n",
      "  9 |x|=4.583598917870091e-08 f(x)=6.39939485738033e-10\n",
      " 10 |x|=5.919433633207291e-09 f(x)=6.079082487230461e-11\n",
      " 11 |x|=7.645240393773708e-10 f(x)=5.775645207046635e-12\n",
      " 12 |x|=9.873568614933486e-11 f(x)=5.487057176923128e-13\n",
      " 13 |x|=1.2751503804021772e-11 f(x)=5.2129920504843006e-14\n",
      " 14 |x|=1.6468139161101762e-12 f(x)=4.952579684696765e-15\n",
      " 15 |x|=2.1268069694461033e-13 f(x)=4.705188688880147e-16\n",
      " 16 |x|=2.7466985454931896e-14 f(x)=4.4701509509822754e-17\n",
      " 17 |x|=3.5472675767511894e-15 f(x)=4.24685556324816e-18\n",
      " 18 |x|=4.581174184375384e-16 f(x)=4.034713809881591e-19\n",
      " 19 |x|=5.916429023998802e-17 f(x)=3.8331692900286005e-20\n",
      " 20 |x|=7.64086449420329e-18 f(x)=3.641692381183098e-21\n",
      " 21 |x|=9.867913538888078e-19 f(x)=3.4597802714757814e-22\n",
      " 22 |x|=1.274407066453028e-19 f(x)=3.286955141967377e-23\n",
      " 23 |x|=1.6458528577560204e-20 f(x)=3.1227630852848113e-24\n",
      " 24 |x|=2.1255623090194172e-21 f(x)=2.966772852816304e-25\n",
      " 25 |x|=2.74509055138786e-22 f(x)=2.8185747432287323e-26\n",
      " 26 |x|=3.545189949154044e-23 f(x)=2.6777795190059003e-27\n",
      " 27 |x|=4.578490778355825e-24 f(x)=2.544017386713168e-28\n",
      " 28 |x|=5.912963228411339e-25 f(x)=2.4169370248461475e-29\n",
      " 29 |x|=7.636388459209945e-26 f(x)=2.2962046614124233e-30\n",
      " 30 |x|=9.862132816860661e-27 f(x)=2.181503197181356e-31\n",
      " 31 |x|=1.2736605034813096e-27 f(x)=2.072531372872752e-32\n",
      " 32 |x|=1.644888695226584e-28 f(x)=1.969002977896736e-33\n",
      " 33 |x|=2.1243171255516505e-29 f(x)=1.8706460986366015e-34\n",
      " 34 |x|=2.743482439272401e-30 f(x)=1.7772024042757962e-35\n",
      " 35 |x|=3.5431131275380794e-31 f(x)=1.6884264683018835e-36\n",
      " 36 |x|=4.5758086346132866e-32 f(x)=1.6040851239023861e-37\n",
      " 37 |x|=5.90949933206063e-33 f(x)=1.5239568515606112e-38\n",
      " 38 |x|=7.631914956289756e-34 f(x)=1.4478311972425314e-39\n",
      " 39 |x|=9.856355441828773e-35 f(x)=1.3755082196468417e-40\n",
      " 40 |x|=1.2729143753836577e-35 f(x)=1.3067979650662867e-41\n",
      " 41 |x|=1.6439250964733183e-36 f(x)=1.2415199684810618e-42\n",
      " 42 |x|=2.1230726709330098e-37 f(x)=1.1795027795739103e-43\n",
      " 43 |x|=2.741875268972016e-38 f(x)=1.1205835124220183e-44\n",
      " 44 |x|=3.541037522421003e-39 f(x)=1.0646074176829714e-45\n",
      " 45 |x|=4.573128062056071e-40 f(x)=1.0114274761511596e-46\n",
      " 46 |x|=5.906037464880122e-41 f(x)=9.609040126171088e-48\n",
      " 47 |x|=7.627444073998894e-42 f(x)=9.129043290155451e-49\n",
      " 48 |x|=9.850581451257635e-43 f(x)=8.673023558986891e-50\n",
      " 49 |x|=1.2721686843780192e-43 f(x)=8.23978321319371e-51\n",
      " 50 |x|=1.6429620622095138e-44 f(x)=7.828184362543092e-52\n"
     ]
    }
   ],
   "source": [
    "x_dist = np.square(xs[-1]).sum()\n",
    "print('{:3} |x|={} f(x)={}'.format(int(kk), x_dist, y.eval()))\n",
    "\n",
    "\n",
    "for i in range(50):\n",
    "    #print('Y', Ys)\n",
    "    #print('S', Ss)\n",
    "    python_bfgs_iteration(gradient_func, gradients, xs, Ys, Ss, kk, m)\n",
    "    x_dist = np.square(xs[-1]).sum()\n",
    "    #x.set_value(xs[-1])\n",
    "    kk += 1\n",
    "    print('{:3} |x|={} f(x)={}'.format(int(kk), x_dist, y.eval()))\n",
    "    \n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 458,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def bfgs_update(gradient, x, Y, S, k):\n",
    "    \"\"\"Builds the updates for the L-BGFS algorithm as described in:\n",
    "    Nocedal, Wright: Numerical Optimization, Algorithm 7.5\"\"\"\n",
    "    not_first_step = T.gt(k,0)\n",
    "    \n",
    "    ## Do the update which should have been done at the end of the last loop now where we have\n",
    "    ## what used to be \\nabla f_{k+1}\n",
    "    \n",
    "    s_k = x - last_x\n",
    "    y_k = gradient - last_gradient\n",
    "    \n",
    "    store_index = (k - 1) % Y.shape[0]\n",
    "    Y_new = ifelse(not_first_step, T.set_subtensor(Y[store_index], y_k), Y)\n",
    "    S_new = ifelse(not_first_step, T.set_subtensor(S[store_index], s_k), S)\n",
    "    \n",
    "    #Y_new = theano.printing.Print('Y')(Y_new)\n",
    "    #S_new = theano.printing.Print('S')(S_new)\n",
    "    \n",
    "    \n",
    "    p_k = - two_loop_recursion(gradient, Y_new,S_new, k, apply_Hk0)\n",
    "    \n",
    "    \n",
    "    #p_k = theano.printing.Print('p_k')(p_k)\n",
    "    #kk = theano.printing.Print('k')(k)\n",
    "    kk = k\n",
    "    new_k = kk+1\n",
    "    \n",
    "    \n",
    "    x_k1 = x + p_k\n",
    "    updates = [(x, x_k1),\n",
    "               (last_gradient, gradient),\n",
    "               (last_x, x),\n",
    "               (Y, Y_new),\n",
    "               (S, S_new),\n",
    "               (k, new_k)]\n",
    "    return updates\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 459,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "N = 3\n",
    "m = 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 460,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "k = theano.shared(0)\n",
    "last_x = theano.shared(np.random.randn(N))\n",
    "last_gradient = theano.shared(np.random.randn(N))\n",
    "S = theano.shared(np.random.standard_normal((m, N)), name='S')\n",
    "Y = theano.shared(np.random.standard_normal((m, N)), name='Y')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 461,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "x = theano.shared(x_0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 462,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "y = 0.2*(T.abs_(x)**2.3).sum()\n",
    "gradient = T.grad(y, x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 463,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.4/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences\n",
      "  'must be passed as a part of non_sequences', Warning)\n"
     ]
    }
   ],
   "source": [
    "updates = bfgs_update(gradient, x, Y, S, k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 464,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "f_bfgs = theano.function([], y, updates=updates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 465,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0 |x|=2.60976627311797 f(x)=0.5465937982294136\n",
      "  1 |x|=0.702598305133158 f(x)=0.1202228141546399\n",
      "  2 |x|=0.06887808876605656 f(x)=0.008279923286513797\n",
      "  3 |x|=0.009963579722922153 f(x)=0.0008906010431048172\n",
      "  4 |x|=0.0012511212735576791 f(x)=8.145131497737262e-05\n",
      "  5 |x|=0.000164720527116826 f(x)=7.881494947743873e-06\n",
      "  6 |x|=2.121541432371882e-05 f(x)=7.449048294397654e-07\n",
      "  7 |x|=2.7477632667395425e-06 f(x)=7.093337818228644e-08\n",
      "  8 |x|=3.5476394070107576e-07 f(x)=6.733865200852461e-09\n",
      "  9 |x|=4.5835989178700805e-08 f(x)=6.399394857380312e-10\n",
      " 10 |x|=5.919433633207281e-09 f(x)=6.079082487230449e-11\n",
      " 11 |x|=7.645240393773698e-10 f(x)=5.775645207046627e-12\n",
      " 12 |x|=9.873568614933473e-11 f(x)=5.487057176923119e-13\n",
      " 13 |x|=1.2751503804021746e-11 f(x)=5.2129920504842886e-14\n",
      " 14 |x|=1.646813916110173e-12 f(x)=4.952579684696753e-15\n",
      " 15 |x|=2.126806969446094e-13 f(x)=4.705188688880122e-16\n",
      " 16 |x|=2.746698545493183e-14 f(x)=4.4701509509822625e-17\n",
      " 17 |x|=3.5472675767511807e-15 f(x)=4.2468555632481475e-18\n",
      " 18 |x|=4.581174184375375e-16 f(x)=4.034713809881582e-19\n",
      " 19 |x|=5.916429023998782e-17 f(x)=3.8331692900285855e-20\n",
      " 20 |x|=7.640864494203281e-18 f(x)=3.6416923811830926e-21\n",
      " 21 |x|=9.86791353888805e-19 f(x)=3.45978027147577e-22\n",
      " 22 |x|=1.2744070664530268e-19 f(x)=3.2869551419673724e-23\n",
      " 23 |x|=1.6458528577560162e-20 f(x)=3.122763085284802e-24\n",
      " 24 |x|=2.1255623090194112e-21 f(x)=2.9667728528162933e-25\n",
      " 25 |x|=2.7450905513878517e-22 f(x)=2.818574743228722e-26\n",
      " 26 |x|=3.545189949154034e-23 f(x)=2.6777795190058913e-27\n",
      " 27 |x|=4.578490778355809e-24 f(x)=2.544017386713158e-28\n",
      " 28 |x|=5.912963228411321e-25 f(x)=2.416937024846139e-29\n",
      " 29 |x|=7.636388459209922e-26 f(x)=2.2962046614124146e-30\n",
      " 30 |x|=9.862132816860626e-27 f(x)=2.181503197181347e-31\n",
      " 31 |x|=1.2736605034813046e-27 f(x)=2.072531372872742e-32\n",
      " 32 |x|=1.6448886952265766e-28 f(x)=1.9690029778967255e-33\n",
      " 33 |x|=2.1243171255516384e-29 f(x)=1.8706460986365884e-34\n",
      " 34 |x|=2.743482439272389e-30 f(x)=1.7772024042757868e-35\n",
      " 35 |x|=3.5431131275380575e-31 f(x)=1.6884264683018715e-36\n",
      " 36 |x|=4.5758086346132647e-32 f(x)=1.6040851239023767e-37\n",
      " 37 |x|=5.9094993320605904e-33 f(x)=1.5239568515605992e-38\n",
      " 38 |x|=7.631914956289707e-34 f(x)=1.4478311972425201e-39\n",
      " 39 |x|=9.856355441828715e-35 f(x)=1.3755082196468319e-40\n",
      " 40 |x|=1.2729143753836495e-35 f(x)=1.3067979650662765e-41\n",
      " 41 |x|=1.643925096473307e-36 f(x)=1.2415199684810516e-42\n",
      " 42 |x|=2.1230726709329943e-37 f(x)=1.1795027795739002e-43\n",
      " 43 |x|=2.741875268971997e-38 f(x)=1.1205835124220091e-44\n",
      " 44 |x|=3.541037522420973e-39 f(x)=1.0646074176829608e-45\n",
      " 45 |x|=4.573128062056035e-40 f(x)=1.0114274761511503e-46\n",
      " 46 |x|=5.906037464880071e-41 f(x)=9.609040126170988e-48\n",
      " 47 |x|=7.627444073998833e-42 f(x)=9.129043290155365e-49\n",
      " 48 |x|=9.85058145125755e-43 f(x)=8.6730235589868e-50\n",
      " 49 |x|=1.2721686843780113e-43 f(x)=8.239783213193649e-51\n"
     ]
    }
   ],
   "source": [
    "#x_dist = np.square(x.get_value()).sum()\n",
    "#print('{:3} |x|={} f(x)={}'.format(int(k.get_value()), x_dist, y.eval()))\n",
    "for i in range(50):\n",
    "    x_dist = np.square(x.get_value()).sum()\n",
    "    value = f_bfgs()\n",
    "    print('{:3} |x|={} f(x)={}'.format(int(k.get_value()-1), x_dist, value))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"%matplotlib inline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.4/dist-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.\n",
	" warnings.warn(self.msg_depr % (key, alt_key))\n"
	]
	}
	],
	"source": [
	"import numpy as np\n",
	"import matplotlib.pyplot as plt\n",
	"import seaborn as sns\n",
	"sns.set_style('white')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import theano\n",
	"import theano.tensor as T"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from theano.ifelse import ifelse"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Two loop recursion"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 429,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"### reference implementation in python\n",
	"def python_two_loop_recursion(gradient, Y, S, k, m):\n",
	" if k == 0:\n",
	" return gradient\n",
	" \n",
	" q = gradient\n",
	" start_loop = np.max([k-m, 0])\n",
	" loop_indices = range(start_loop, k)\n",
	" alphas = []\n",
	" \n",
	" for i in loop_indices[::-1]:\n",
	" s_i = S[i]\n",
	" y_i = Y[i]\n",
	" \n",
	" rho_i = 1/y_i.dot(s_i)\n",
	" alpha_i = rho_i * s_i.dot(q)\n",
	" alphas.append(alpha_i)\n",
	" q = q - alpha_i*y_i\n",
	" #print('q', q)\n",
	" \n",
	" #print('final q', q)\n",
	" \n",
	" alphas = alphas[::-1]\n",
	" #print('alpha', alphas)\n",
	" \n",
	" s_k1 = S[k-1]\n",
	" y_k1 = Y[k-1]\n",
	" \n",
	" #print('s_{k-1}', s_k1)\n",
	" #print('y_{k-1}', y_k1)\n",
	" \n",
	" gamma_k = s_k1.dot(y_k1) / y_k1.dot(y_k1)\n",
	" #print('gamma_k', gamma_k)\n",
	" r = gamma_k * q\n",
	" #print('r_0', r)\n",
	" \n",
	" for l, i in enumerate(loop_indices):\n",
	" s_i = S[i]\n",
	" y_i = Y[i]\n",
	" \n",
	" rho_i = 1/y_i.dot(s_i)\n",
	" alpha_i = alphas[l]\n",
	" beta = rho_i * y_i.dot(r)\n",
	" \n",
	" r = r + s_i*(alpha_i - beta)\n",
	" #print('r', r)\n",
	" \n",
	" return r\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 430,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"def two_loop_recursion(gradient, Y, S, k, apply_Hk0):\n",
	" \"\"\"\n",
	" Build the L-BFGS two loop recursion as described in:\n",
	" Nocedal, Wright: Numerical Optimization, Algorithm 7.4\n",
	" \"\"\"\n",
	" \n",
	" not_first_step = T.gt(k,0)\n",
	"\n",
	" #### First loop\n",
	" \n",
	" def f(i, q, alpha, Y, S):\n",
	" valid_step = T.ge(i, 0)\n",
	" #i = theano.printing.Print('i')(i)\n",
	" store_index = i % Y.shape[0]\n",
	" y = Y[store_index]\n",
	" s = S[store_index]\n",
	" \n",
	" #y = theano.printing.Print('y')(y)\n",
	" #s = theano.printing.Print('s')(s)\n",
	" \n",
	" rho = 1/y.dot(s)\n",
	" alpha = rho * s.dot(q)\n",
	" new_q = q - alpha*y \n",
	" #new_q = theano.printing.Print('q')(new_q)\n",
	" return ifelse(valid_step, new_q, q), ifelse(valid_step, alpha, np.array(0.0, dtype=theano.config.floatX))\n",
	"\n",
	" q3 = gradient\n",
	" alpha = T.zeros(())\n",
	" \n",
	" loop_indices = T.arange(k-Y.shape[0], k)\n",
	"\n",
	" (results, alpha), updates = theano.scan(f, sequences=[loop_indices],\n",
	" outputs_info=[q3, alpha],\n",
	" non_sequences=[Y,S],\n",
	" strict=True,\n",
	" go_backwards=True)\n",
	" alpha = alpha[::-1]\n",
	" results[-1]\n",
	" q3 = results[-1]\n",
	" #q3 = theano.printing.Print('final q')(q3)\n",
	"\n",
	" #### gamma_k = s_{k-1}^Ty_{k_1} / y_{k-1}^Ty_{k-1} (7.20)\n",
	" #\n",
	" #s = S[(k - 1) % Y.shape[0]]\n",
	" #y = Y[(k - 1) % Y.shape[0]]\n",
	" #\n",
	" #gamma_k = ifelse(T.gt(k,0),s.dot(y)/y.dot(y), np.array(1.0, dtype=theano.config.floatX))\n",
	" #\n",
	" #### r = H_k^0 q with H_k^0 = gamma_k\n",
	" \n",
	" #r = gamma_k*q3\n",
	" \n",
	" r = apply_Hk0(q3, S, Y)\n",
	" #r = theano.printing.Print('r_0')(r)\n",
	" \n",
	" #alpha = theano.printing.Print('alpha')(alpha)\n",
	" \n",
	" #### Second loop\n",
	"\n",
	" def f2(i, alpha_i, r, Y, S):\n",
	" valid_step = T.ge(i, 0)\n",
	" #i = theano.printing.Print('i')(i)\n",
	" \n",
	" \n",
	" store_index = i % Y.shape[0]\n",
	" y = Y[store_index]\n",
	" s = S[store_index]\n",
	" \n",
	" #y = theano.printing.Print('y')(y)\n",
	" #s = theano.printing.Print('s')(s)\n",
	" \n",
	" #alpha_i = alpha[i]\n",
	"\n",
	" rho = 1/y.dot(s)\n",
	"\n",
	" beta = rho * y.dot(r)\n",
	" new_r = r + s*(alpha_i - beta)\n",
	" #new_r = theano.printing.Print('r')(new_r)\n",
	" new_r = ifelse(valid_step, new_r, r)\n",
	" return new_r\n",
	"\n",
	" results, updates = theano.scan(f2, sequences=[loop_indices, alpha],\n",
	" outputs_info=[r],\n",
	" non_sequences=[Y,S],\n",
	" go_backwards=False,\n",
	" strict=True)\n",
	" r = results[-1]\n",
	" \n",
	" return r\n",
	" return q3, gamma_k, s, y, s.dot(y), y.dot(y), r"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 446,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def apply_Hk0(q, S, Y):\n",
	" \"\"\"\n",
	" H_k^0 q := gamma_k q with\n",
	" \n",
	" gamma_k = s_{k-1}^Ty_{k_1} / y_{k-1}^Ty_{k-1} (7.20)\n",
	" \n",
	" \"\"\"\n",
	" s = S[(k - 1) % Y.shape[0]]\n",
	" y = Y[(k - 1) % Y.shape[0]]\n",
	" \n",
	" #s = theano.printing.Print('s_{k-1}')(s)\n",
	" #y = theano.printing.Print('y_{k-1}')(y)\n",
	" \n",
	"\n",
	" gamma_k = ifelse(T.gt(k,0),s.dot(y)/y.dot(y), np.array(1.0, dtype=theano.config.floatX))\n",
	" #gamma_k = theano.printing.Print('gamma_k')(gamma_k)\n",
	" return gamma_k * q"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We compare the theano implementation to the python implementation:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 260,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.4/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences\n",
	" 'must be passed as a part of non_sequences', Warning)\n"
	]
	}
	],
	"source": [
	"rr = two_loop_recursion(gradient, Y, S, k, apply_Hk0)\n",
	"f_theano = theano.function([], rr)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 261,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def f_python():\n",
	" gradient_value = gradient.eval()\n",
	" Ys = [Y.eval()[i % m] for i in range(k.eval())]\n",
	" Ss = [S.eval()[i % m] for i in range(k.eval())]\n",
	" r = python_two_loop_recursion(gradient.eval(), Ys, Ss, k.eval(), m)\n",
	" return r"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 217,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"k.set_value(1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 269,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([ 0.02685671, -0.36139641, -0.4481632 ])"
	]
	},
	"execution_count": 269,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"f_theano()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 270,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([ 0.02685671, -0.36139641, -0.4481632 ])"
	]
	},
	"execution_count": 270,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"f_python()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# The BGFS iteration"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 434,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def python_bfgs_iteration(gradient_func, gradients, xs, Y, S, k, m):\n",
	" gradient_k = gradients[k]\n",
	" x_k = xs[k]\n",
	" p_k = - python_two_loop_recursion(gradient_k, Y, S, k, m)\n",
	" #print(p_k)\n",
	" x_k1 = xs[k] + p_k\n",
	" s_k = x_k1 - x_k\n",
	" gradient_k = gradient_func(x_k)\n",
	" gradient_k1 = gradient_func(x_k1) \n",
	" y_k = gradient_k1 - gradient_k\n",
	" \n",
	" gradients.append(gradient_k1)\n",
	" xs.append(x_k1)\n",
	" Y.append(y_k)\n",
	" S.append(s_k)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 426,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def gradient_func(xx):\n",
	" #print('set')\n",
	" x.set_value(xx)\n",
	" return gradient.eval()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 132,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"x_0 = np.random.randn(N)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 456,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"xs = [x_0]\n",
	"gradients = [gradient_func(x_0)]\n",
	"Ys = []\n",
	"Ss = []\n",
	"kk = 0"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 457,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0 \|x\|=2.60976627311797 f(x)=0.5465937982294136\n",
	" 1 \|x\|=0.702598305133158 f(x)=0.1202228141546399\n",
	" 2 \|x\|=0.06887808876605656 f(x)=0.008279923286513797\n",
	" 3 \|x\|=0.009963579722922153 f(x)=0.0008906010431048172\n",
	" 4 \|x\|=0.0012511212735576796 f(x)=8.145131497737265e-05\n",
	" 5 \|x\|=0.00016472052711682608 f(x)=7.881494947743876e-06\n",
	" 6 \|x\|=2.1215414323718854e-05 f(x)=7.449048294397669e-07\n",
	" 7 \|x\|=2.7477632667395476e-06 f(x)=7.09333781822866e-08\n",
	" 8 \|x\|=3.547639407010765e-07 f(x)=6.733865200852477e-09\n",
	" 9 \|x\|=4.583598917870091e-08 f(x)=6.39939485738033e-10\n",
	" 10 \|x\|=5.919433633207291e-09 f(x)=6.079082487230461e-11\n",
	" 11 \|x\|=7.645240393773708e-10 f(x)=5.775645207046635e-12\n",
	" 12 \|x\|=9.873568614933486e-11 f(x)=5.487057176923128e-13\n",
	" 13 \|x\|=1.2751503804021772e-11 f(x)=5.2129920504843006e-14\n",
	" 14 \|x\|=1.6468139161101762e-12 f(x)=4.952579684696765e-15\n",
	" 15 \|x\|=2.1268069694461033e-13 f(x)=4.705188688880147e-16\n",
	" 16 \|x\|=2.7466985454931896e-14 f(x)=4.4701509509822754e-17\n",
	" 17 \|x\|=3.5472675767511894e-15 f(x)=4.24685556324816e-18\n",
	" 18 \|x\|=4.581174184375384e-16 f(x)=4.034713809881591e-19\n",
	" 19 \|x\|=5.916429023998802e-17 f(x)=3.8331692900286005e-20\n",
	" 20 \|x\|=7.64086449420329e-18 f(x)=3.641692381183098e-21\n",
	" 21 \|x\|=9.867913538888078e-19 f(x)=3.4597802714757814e-22\n",
	" 22 \|x\|=1.274407066453028e-19 f(x)=3.286955141967377e-23\n",
	" 23 \|x\|=1.6458528577560204e-20 f(x)=3.1227630852848113e-24\n",
	" 24 \|x\|=2.1255623090194172e-21 f(x)=2.966772852816304e-25\n",
	" 25 \|x\|=2.74509055138786e-22 f(x)=2.8185747432287323e-26\n",
	" 26 \|x\|=3.545189949154044e-23 f(x)=2.6777795190059003e-27\n",
	" 27 \|x\|=4.578490778355825e-24 f(x)=2.544017386713168e-28\n",
	" 28 \|x\|=5.912963228411339e-25 f(x)=2.4169370248461475e-29\n",
	" 29 \|x\|=7.636388459209945e-26 f(x)=2.2962046614124233e-30\n",
	" 30 \|x\|=9.862132816860661e-27 f(x)=2.181503197181356e-31\n",
	" 31 \|x\|=1.2736605034813096e-27 f(x)=2.072531372872752e-32\n",
	" 32 \|x\|=1.644888695226584e-28 f(x)=1.969002977896736e-33\n",
	" 33 \|x\|=2.1243171255516505e-29 f(x)=1.8706460986366015e-34\n",
	" 34 \|x\|=2.743482439272401e-30 f(x)=1.7772024042757962e-35\n",
	" 35 \|x\|=3.5431131275380794e-31 f(x)=1.6884264683018835e-36\n",
	" 36 \|x\|=4.5758086346132866e-32 f(x)=1.6040851239023861e-37\n",
	" 37 \|x\|=5.90949933206063e-33 f(x)=1.5239568515606112e-38\n",
	" 38 \|x\|=7.631914956289756e-34 f(x)=1.4478311972425314e-39\n",
	" 39 \|x\|=9.856355441828773e-35 f(x)=1.3755082196468417e-40\n",
	" 40 \|x\|=1.2729143753836577e-35 f(x)=1.3067979650662867e-41\n",
	" 41 \|x\|=1.6439250964733183e-36 f(x)=1.2415199684810618e-42\n",
	" 42 \|x\|=2.1230726709330098e-37 f(x)=1.1795027795739103e-43\n",
	" 43 \|x\|=2.741875268972016e-38 f(x)=1.1205835124220183e-44\n",
	" 44 \|x\|=3.541037522421003e-39 f(x)=1.0646074176829714e-45\n",
	" 45 \|x\|=4.573128062056071e-40 f(x)=1.0114274761511596e-46\n",
	" 46 \|x\|=5.906037464880122e-41 f(x)=9.609040126171088e-48\n",
	" 47 \|x\|=7.627444073998894e-42 f(x)=9.129043290155451e-49\n",
	" 48 \|x\|=9.850581451257635e-43 f(x)=8.673023558986891e-50\n",
	" 49 \|x\|=1.2721686843780192e-43 f(x)=8.23978321319371e-51\n",
	" 50 \|x\|=1.6429620622095138e-44 f(x)=7.828184362543092e-52\n"
	]
	}
	],
	"source": [
	"x_dist = np.square(xs[-1]).sum()\n",
	"print('{:3} \|x\|={} f(x)={}'.format(int(kk), x_dist, y.eval()))\n",
	"\n",
	"\n",
	"for i in range(50):\n",
	" #print('Y', Ys)\n",
	" #print('S', Ss)\n",
	" python_bfgs_iteration(gradient_func, gradients, xs, Ys, Ss, kk, m)\n",
	" x_dist = np.square(xs[-1]).sum()\n",
	" #x.set_value(xs[-1])\n",
	" kk += 1\n",
	" print('{:3} \|x\|={} f(x)={}'.format(int(kk), x_dist, y.eval()))\n",
	" \n",
	"\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 458,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def bfgs_update(gradient, x, Y, S, k):\n",
	" \"\"\"Builds the updates for the L-BGFS algorithm as described in:\n",
	" Nocedal, Wright: Numerical Optimization, Algorithm 7.5\"\"\"\n",
	" not_first_step = T.gt(k,0)\n",
	" \n",
	" ## Do the update which should have been done at the end of the last loop now where we have\n",
	" ## what used to be \\nabla f_{k+1}\n",
	" \n",
	" s_k = x - last_x\n",
	" y_k = gradient - last_gradient\n",
	" \n",
	" store_index = (k - 1) % Y.shape[0]\n",
	" Y_new = ifelse(not_first_step, T.set_subtensor(Y[store_index], y_k), Y)\n",
	" S_new = ifelse(not_first_step, T.set_subtensor(S[store_index], s_k), S)\n",
	" \n",
	" #Y_new = theano.printing.Print('Y')(Y_new)\n",
	" #S_new = theano.printing.Print('S')(S_new)\n",
	" \n",
	" \n",
	" p_k = - two_loop_recursion(gradient, Y_new,S_new, k, apply_Hk0)\n",
	" \n",
	" \n",
	" #p_k = theano.printing.Print('p_k')(p_k)\n",
	" #kk = theano.printing.Print('k')(k)\n",
	" kk = k\n",
	" new_k = kk+1\n",
	" \n",
	" \n",
	" x_k1 = x + p_k\n",
	" updates = [(x, x_k1),\n",
	" (last_gradient, gradient),\n",
	" (last_x, x),\n",
	" (Y, Y_new),\n",
	" (S, S_new),\n",
	" (k, new_k)]\n",
	" return updates\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 459,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"N = 3\n",
	"m = 20"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 460,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"k = theano.shared(0)\n",
	"last_x = theano.shared(np.random.randn(N))\n",
	"last_gradient = theano.shared(np.random.randn(N))\n",
	"S = theano.shared(np.random.standard_normal((m, N)), name='S')\n",
	"Y = theano.shared(np.random.standard_normal((m, N)), name='Y')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 461,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"x = theano.shared(x_0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 462,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"y = 0.2(T.abs_(x)*2.3).sum()\n",
	"gradient = T.grad(y, x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 463,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.4/dist-packages/theano/scan_module/scan.py:1019: Warning: In the strict mode, all neccessary shared variables must be passed as a part of non_sequences\n",
	" 'must be passed as a part of non_sequences', Warning)\n"
	]
	}
	],
	"source": [
	"updates = bfgs_update(gradient, x, Y, S, k)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 464,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"f_bfgs = theano.function([], y, updates=updates)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 465,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0 \|x\|=2.60976627311797 f(x)=0.5465937982294136\n",
	" 1 \|x\|=0.702598305133158 f(x)=0.1202228141546399\n",
	" 2 \|x\|=0.06887808876605656 f(x)=0.008279923286513797\n",
	" 3 \|x\|=0.009963579722922153 f(x)=0.0008906010431048172\n",
	" 4 \|x\|=0.0012511212735576791 f(x)=8.145131497737262e-05\n",
	" 5 \|x\|=0.000164720527116826 f(x)=7.881494947743873e-06\n",
	" 6 \|x\|=2.121541432371882e-05 f(x)=7.449048294397654e-07\n",
	" 7 \|x\|=2.7477632667395425e-06 f(x)=7.093337818228644e-08\n",
	" 8 \|x\|=3.5476394070107576e-07 f(x)=6.733865200852461e-09\n",
	" 9 \|x\|=4.5835989178700805e-08 f(x)=6.399394857380312e-10\n",
	" 10 \|x\|=5.919433633207281e-09 f(x)=6.079082487230449e-11\n",
	" 11 \|x\|=7.645240393773698e-10 f(x)=5.775645207046627e-12\n",
	" 12 \|x\|=9.873568614933473e-11 f(x)=5.487057176923119e-13\n",
	" 13 \|x\|=1.2751503804021746e-11 f(x)=5.2129920504842886e-14\n",
	" 14 \|x\|=1.646813916110173e-12 f(x)=4.952579684696753e-15\n",
	" 15 \|x\|=2.126806969446094e-13 f(x)=4.705188688880122e-16\n",
	" 16 \|x\|=2.746698545493183e-14 f(x)=4.4701509509822625e-17\n",
	" 17 \|x\|=3.5472675767511807e-15 f(x)=4.2468555632481475e-18\n",
	" 18 \|x\|=4.581174184375375e-16 f(x)=4.034713809881582e-19\n",
	" 19 \|x\|=5.916429023998782e-17 f(x)=3.8331692900285855e-20\n",
	" 20 \|x\|=7.640864494203281e-18 f(x)=3.6416923811830926e-21\n",
	" 21 \|x\|=9.86791353888805e-19 f(x)=3.45978027147577e-22\n",
	" 22 \|x\|=1.2744070664530268e-19 f(x)=3.2869551419673724e-23\n",
	" 23 \|x\|=1.6458528577560162e-20 f(x)=3.122763085284802e-24\n",
	" 24 \|x\|=2.1255623090194112e-21 f(x)=2.9667728528162933e-25\n",
	" 25 \|x\|=2.7450905513878517e-22 f(x)=2.818574743228722e-26\n",
	" 26 \|x\|=3.545189949154034e-23 f(x)=2.6777795190058913e-27\n",
	" 27 \|x\|=4.578490778355809e-24 f(x)=2.544017386713158e-28\n",
	" 28 \|x\|=5.912963228411321e-25 f(x)=2.416937024846139e-29\n",
	" 29 \|x\|=7.636388459209922e-26 f(x)=2.2962046614124146e-30\n",
	" 30 \|x\|=9.862132816860626e-27 f(x)=2.181503197181347e-31\n",
	" 31 \|x\|=1.2736605034813046e-27 f(x)=2.072531372872742e-32\n",
	" 32 \|x\|=1.6448886952265766e-28 f(x)=1.9690029778967255e-33\n",
	" 33 \|x\|=2.1243171255516384e-29 f(x)=1.8706460986365884e-34\n",
	" 34 \|x\|=2.743482439272389e-30 f(x)=1.7772024042757868e-35\n",
	" 35 \|x\|=3.5431131275380575e-31 f(x)=1.6884264683018715e-36\n",
	" 36 \|x\|=4.5758086346132647e-32 f(x)=1.6040851239023767e-37\n",
	" 37 \|x\|=5.9094993320605904e-33 f(x)=1.5239568515605992e-38\n",
	" 38 \|x\|=7.631914956289707e-34 f(x)=1.4478311972425201e-39\n",
	" 39 \|x\|=9.856355441828715e-35 f(x)=1.3755082196468319e-40\n",
	" 40 \|x\|=1.2729143753836495e-35 f(x)=1.3067979650662765e-41\n",
	" 41 \|x\|=1.643925096473307e-36 f(x)=1.2415199684810516e-42\n",
	" 42 \|x\|=2.1230726709329943e-37 f(x)=1.1795027795739002e-43\n",
	" 43 \|x\|=2.741875268971997e-38 f(x)=1.1205835124220091e-44\n",
	" 44 \|x\|=3.541037522420973e-39 f(x)=1.0646074176829608e-45\n",
	" 45 \|x\|=4.573128062056035e-40 f(x)=1.0114274761511503e-46\n",
	" 46 \|x\|=5.906037464880071e-41 f(x)=9.609040126170988e-48\n",
	" 47 \|x\|=7.627444073998833e-42 f(x)=9.129043290155365e-49\n",
	" 48 \|x\|=9.85058145125755e-43 f(x)=8.6730235589868e-50\n",
	" 49 \|x\|=1.2721686843780113e-43 f(x)=8.239783213193649e-51\n"
	]
	}
	],
	"source": [
	"#x_dist = np.square(x.get_value()).sum()\n",
	"#print('{:3} \|x\|={} f(x)={}'.format(int(k.get_value()), x_dist, y.eval()))\n",
	"for i in range(50):\n",
	" x_dist = np.square(x.get_value()).sum()\n",
	" value = f_bfgs()\n",
	" print('{:3} \|x\|={} f(x)={}'.format(int(k.get_value()-1), x_dist, value))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.4.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}
No results found