optimization.py 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102
  1. from __future__ import print_function
  2. from copy import copy
  3. from ..libmp.backend import xrange
  4. class OptimizationMethods(object):
  5. def __init__(ctx):
  6. pass
  7. ##############
  8. # 1D-SOLVERS #
  9. ##############
  10. class Newton:
  11. """
  12. 1d-solver generating pairs of approximative root and error.
  13. Needs starting points x0 close to the root.
  14. Pro:
  15. * converges fast
  16. * sometimes more robust than secant with bad second starting point
  17. Contra:
  18. * converges slowly for multiple roots
  19. * needs first derivative
  20. * 2 function evaluations per iteration
  21. """
  22. maxsteps = 20
  23. def __init__(self, ctx, f, x0, **kwargs):
  24. self.ctx = ctx
  25. if len(x0) == 1:
  26. self.x0 = x0[0]
  27. else:
  28. raise ValueError('expected 1 starting point, got %i' % len(x0))
  29. self.f = f
  30. if not 'df' in kwargs:
  31. def df(x):
  32. return self.ctx.diff(f, x)
  33. else:
  34. df = kwargs['df']
  35. self.df = df
  36. def __iter__(self):
  37. f = self.f
  38. df = self.df
  39. x0 = self.x0
  40. while True:
  41. x1 = x0 - f(x0) / df(x0)
  42. error = abs(x1 - x0)
  43. x0 = x1
  44. yield (x1, error)
  45. class Secant:
  46. """
  47. 1d-solver generating pairs of approximative root and error.
  48. Needs starting points x0 and x1 close to the root.
  49. x1 defaults to x0 + 0.25.
  50. Pro:
  51. * converges fast
  52. Contra:
  53. * converges slowly for multiple roots
  54. """
  55. maxsteps = 30
  56. def __init__(self, ctx, f, x0, **kwargs):
  57. self.ctx = ctx
  58. if len(x0) == 1:
  59. self.x0 = x0[0]
  60. self.x1 = self.x0 + 0.25
  61. elif len(x0) == 2:
  62. self.x0 = x0[0]
  63. self.x1 = x0[1]
  64. else:
  65. raise ValueError('expected 1 or 2 starting points, got %i' % len(x0))
  66. self.f = f
  67. def __iter__(self):
  68. f = self.f
  69. x0 = self.x0
  70. x1 = self.x1
  71. f0 = f(x0)
  72. while True:
  73. f1 = f(x1)
  74. l = x1 - x0
  75. if not l:
  76. break
  77. s = (f1 - f0) / l
  78. if not s:
  79. break
  80. x0, x1 = x1, x1 - f1/s
  81. f0 = f1
  82. yield x1, abs(l)
  83. class MNewton:
  84. """
  85. 1d-solver generating pairs of approximative root and error.
  86. Needs starting point x0 close to the root.
  87. Uses modified Newton's method that converges fast regardless of the
  88. multiplicity of the root.
  89. Pro:
  90. * converges fast for multiple roots
  91. Contra:
  92. * needs first and second derivative of f
  93. * 3 function evaluations per iteration
  94. """
  95. maxsteps = 20
  96. def __init__(self, ctx, f, x0, **kwargs):
  97. self.ctx = ctx
  98. if not len(x0) == 1:
  99. raise ValueError('expected 1 starting point, got %i' % len(x0))
  100. self.x0 = x0[0]
  101. self.f = f
  102. if not 'df' in kwargs:
  103. def df(x):
  104. return self.ctx.diff(f, x)
  105. else:
  106. df = kwargs['df']
  107. self.df = df
  108. if not 'd2f' in kwargs:
  109. def d2f(x):
  110. return self.ctx.diff(df, x)
  111. else:
  112. d2f = kwargs['df']
  113. self.d2f = d2f
  114. def __iter__(self):
  115. x = self.x0
  116. f = self.f
  117. df = self.df
  118. d2f = self.d2f
  119. while True:
  120. prevx = x
  121. fx = f(x)
  122. if fx == 0:
  123. break
  124. dfx = df(x)
  125. d2fx = d2f(x)
  126. # x = x - F(x)/F'(x) with F(x) = f(x)/f'(x)
  127. x -= fx / (dfx - fx * d2fx / dfx)
  128. error = abs(x - prevx)
  129. yield x, error
  130. class Halley:
  131. """
  132. 1d-solver generating pairs of approximative root and error.
  133. Needs a starting point x0 close to the root.
  134. Uses Halley's method with cubic convergence rate.
  135. Pro:
  136. * converges even faster the Newton's method
  137. * useful when computing with *many* digits
  138. Contra:
  139. * needs first and second derivative of f
  140. * 3 function evaluations per iteration
  141. * converges slowly for multiple roots
  142. """
  143. maxsteps = 20
  144. def __init__(self, ctx, f, x0, **kwargs):
  145. self.ctx = ctx
  146. if not len(x0) == 1:
  147. raise ValueError('expected 1 starting point, got %i' % len(x0))
  148. self.x0 = x0[0]
  149. self.f = f
  150. if not 'df' in kwargs:
  151. def df(x):
  152. return self.ctx.diff(f, x)
  153. else:
  154. df = kwargs['df']
  155. self.df = df
  156. if not 'd2f' in kwargs:
  157. def d2f(x):
  158. return self.ctx.diff(df, x)
  159. else:
  160. d2f = kwargs['df']
  161. self.d2f = d2f
  162. def __iter__(self):
  163. x = self.x0
  164. f = self.f
  165. df = self.df
  166. d2f = self.d2f
  167. while True:
  168. prevx = x
  169. fx = f(x)
  170. dfx = df(x)
  171. d2fx = d2f(x)
  172. x -= 2*fx*dfx / (2*dfx**2 - fx*d2fx)
  173. error = abs(x - prevx)
  174. yield x, error
  175. class Muller:
  176. """
  177. 1d-solver generating pairs of approximative root and error.
  178. Needs starting points x0, x1 and x2 close to the root.
  179. x1 defaults to x0 + 0.25; x2 to x1 + 0.25.
  180. Uses Muller's method that converges towards complex roots.
  181. Pro:
  182. * converges fast (somewhat faster than secant)
  183. * can find complex roots
  184. Contra:
  185. * converges slowly for multiple roots
  186. * may have complex values for real starting points and real roots
  187. http://en.wikipedia.org/wiki/Muller's_method
  188. """
  189. maxsteps = 30
  190. def __init__(self, ctx, f, x0, **kwargs):
  191. self.ctx = ctx
  192. if len(x0) == 1:
  193. self.x0 = x0[0]
  194. self.x1 = self.x0 + 0.25
  195. self.x2 = self.x1 + 0.25
  196. elif len(x0) == 2:
  197. self.x0 = x0[0]
  198. self.x1 = x0[1]
  199. self.x2 = self.x1 + 0.25
  200. elif len(x0) == 3:
  201. self.x0 = x0[0]
  202. self.x1 = x0[1]
  203. self.x2 = x0[2]
  204. else:
  205. raise ValueError('expected 1, 2 or 3 starting points, got %i'
  206. % len(x0))
  207. self.f = f
  208. self.verbose = kwargs['verbose']
  209. def __iter__(self):
  210. f = self.f
  211. x0 = self.x0
  212. x1 = self.x1
  213. x2 = self.x2
  214. fx0 = f(x0)
  215. fx1 = f(x1)
  216. fx2 = f(x2)
  217. while True:
  218. # TODO: maybe refactoring with function for divided differences
  219. # calculate divided differences
  220. fx2x1 = (fx1 - fx2) / (x1 - x2)
  221. fx2x0 = (fx0 - fx2) / (x0 - x2)
  222. fx1x0 = (fx0 - fx1) / (x0 - x1)
  223. w = fx2x1 + fx2x0 - fx1x0
  224. fx2x1x0 = (fx1x0 - fx2x1) / (x0 - x2)
  225. if w == 0 and fx2x1x0 == 0:
  226. if self.verbose:
  227. print('canceled with')
  228. print('x0 =', x0, ', x1 =', x1, 'and x2 =', x2)
  229. break
  230. x0 = x1
  231. fx0 = fx1
  232. x1 = x2
  233. fx1 = fx2
  234. # denominator should be as large as possible => choose sign
  235. r = self.ctx.sqrt(w**2 - 4*fx2*fx2x1x0)
  236. if abs(w - r) > abs(w + r):
  237. r = -r
  238. x2 -= 2*fx2 / (w + r)
  239. fx2 = f(x2)
  240. error = abs(x2 - x1)
  241. yield x2, error
  242. # TODO: consider raising a ValueError when there's no sign change in a and b
  243. class Bisection:
  244. """
  245. 1d-solver generating pairs of approximative root and error.
  246. Uses bisection method to find a root of f in [a, b].
  247. Might fail for multiple roots (needs sign change).
  248. Pro:
  249. * robust and reliable
  250. Contra:
  251. * converges slowly
  252. * needs sign change
  253. """
  254. maxsteps = 100
  255. def __init__(self, ctx, f, x0, **kwargs):
  256. self.ctx = ctx
  257. if len(x0) != 2:
  258. raise ValueError('expected interval of 2 points, got %i' % len(x0))
  259. self.f = f
  260. self.a = x0[0]
  261. self.b = x0[1]
  262. def __iter__(self):
  263. f = self.f
  264. a = self.a
  265. b = self.b
  266. l = b - a
  267. fb = f(b)
  268. while True:
  269. m = self.ctx.ldexp(a + b, -1)
  270. fm = f(m)
  271. sign = fm * fb
  272. if sign < 0:
  273. a = m
  274. elif sign > 0:
  275. b = m
  276. fb = fm
  277. else:
  278. yield m, self.ctx.zero
  279. l /= 2
  280. yield (a + b)/2, abs(l)
  281. def _getm(method):
  282. """
  283. Return a function to calculate m for Illinois-like methods.
  284. """
  285. if method == 'illinois':
  286. def getm(fz, fb):
  287. return 0.5
  288. elif method == 'pegasus':
  289. def getm(fz, fb):
  290. return fb/(fb + fz)
  291. elif method == 'anderson':
  292. def getm(fz, fb):
  293. m = 1 - fz/fb
  294. if m > 0:
  295. return m
  296. else:
  297. return 0.5
  298. else:
  299. raise ValueError("method '%s' not recognized" % method)
  300. return getm
  301. class Illinois:
  302. """
  303. 1d-solver generating pairs of approximative root and error.
  304. Uses Illinois method or similar to find a root of f in [a, b].
  305. Might fail for multiple roots (needs sign change).
  306. Combines bisect with secant (improved regula falsi).
  307. The only difference between the methods is the scaling factor m, which is
  308. used to ensure convergence (you can choose one using the 'method' keyword):
  309. Illinois method ('illinois'):
  310. m = 0.5
  311. Pegasus method ('pegasus'):
  312. m = fb/(fb + fz)
  313. Anderson-Bjoerk method ('anderson'):
  314. m = 1 - fz/fb if positive else 0.5
  315. Pro:
  316. * converges very fast
  317. Contra:
  318. * has problems with multiple roots
  319. * needs sign change
  320. """
  321. maxsteps = 30
  322. def __init__(self, ctx, f, x0, **kwargs):
  323. self.ctx = ctx
  324. if len(x0) != 2:
  325. raise ValueError('expected interval of 2 points, got %i' % len(x0))
  326. self.a = x0[0]
  327. self.b = x0[1]
  328. self.f = f
  329. self.tol = kwargs['tol']
  330. self.verbose = kwargs['verbose']
  331. self.method = kwargs.get('method', 'illinois')
  332. self.getm = _getm(self.method)
  333. if self.verbose:
  334. print('using %s method' % self.method)
  335. def __iter__(self):
  336. method = self.method
  337. f = self.f
  338. a = self.a
  339. b = self.b
  340. fa = f(a)
  341. fb = f(b)
  342. m = None
  343. while True:
  344. l = b - a
  345. if l == 0:
  346. break
  347. s = (fb - fa) / l
  348. z = a - fa/s
  349. fz = f(z)
  350. if abs(fz) < self.tol:
  351. # TODO: better condition (when f is very flat)
  352. if self.verbose:
  353. print('canceled with z =', z)
  354. yield z, l
  355. break
  356. if fz * fb < 0: # root in [z, b]
  357. a = b
  358. fa = fb
  359. b = z
  360. fb = fz
  361. else: # root in [a, z]
  362. m = self.getm(fz, fb)
  363. b = z
  364. fb = fz
  365. fa = m*fa # scale down to ensure convergence
  366. if self.verbose and m and not method == 'illinois':
  367. print('m:', m)
  368. yield (a + b)/2, abs(l)
  369. def Pegasus(*args, **kwargs):
  370. """
  371. 1d-solver generating pairs of approximative root and error.
  372. Uses Pegasus method to find a root of f in [a, b].
  373. Wrapper for illinois to use method='pegasus'.
  374. """
  375. kwargs['method'] = 'pegasus'
  376. return Illinois(*args, **kwargs)
  377. def Anderson(*args, **kwargs):
  378. """
  379. 1d-solver generating pairs of approximative root and error.
  380. Uses Anderson-Bjoerk method to find a root of f in [a, b].
  381. Wrapper for illinois to use method='pegasus'.
  382. """
  383. kwargs['method'] = 'anderson'
  384. return Illinois(*args, **kwargs)
  385. # TODO: check whether it's possible to combine it with Illinois stuff
  386. class Ridder:
  387. """
  388. 1d-solver generating pairs of approximative root and error.
  389. Ridders' method to find a root of f in [a, b].
  390. Is told to perform as well as Brent's method while being simpler.
  391. Pro:
  392. * very fast
  393. * simpler than Brent's method
  394. Contra:
  395. * two function evaluations per step
  396. * has problems with multiple roots
  397. * needs sign change
  398. http://en.wikipedia.org/wiki/Ridders'_method
  399. """
  400. maxsteps = 30
  401. def __init__(self, ctx, f, x0, **kwargs):
  402. self.ctx = ctx
  403. self.f = f
  404. if len(x0) != 2:
  405. raise ValueError('expected interval of 2 points, got %i' % len(x0))
  406. self.x1 = x0[0]
  407. self.x2 = x0[1]
  408. self.verbose = kwargs['verbose']
  409. self.tol = kwargs['tol']
  410. def __iter__(self):
  411. ctx = self.ctx
  412. f = self.f
  413. x1 = self.x1
  414. fx1 = f(x1)
  415. x2 = self.x2
  416. fx2 = f(x2)
  417. while True:
  418. x3 = 0.5*(x1 + x2)
  419. fx3 = f(x3)
  420. x4 = x3 + (x3 - x1) * ctx.sign(fx1 - fx2) * fx3 / ctx.sqrt(fx3**2 - fx1*fx2)
  421. fx4 = f(x4)
  422. if abs(fx4) < self.tol:
  423. # TODO: better condition (when f is very flat)
  424. if self.verbose:
  425. print('canceled with f(x4) =', fx4)
  426. yield x4, abs(x1 - x2)
  427. break
  428. if fx4 * fx2 < 0: # root in [x4, x2]
  429. x1 = x4
  430. fx1 = fx4
  431. else: # root in [x1, x4]
  432. x2 = x4
  433. fx2 = fx4
  434. error = abs(x1 - x2)
  435. yield (x1 + x2)/2, error
  436. class ANewton:
  437. """
  438. EXPERIMENTAL 1d-solver generating pairs of approximative root and error.
  439. Uses Newton's method modified to use Steffensens method when convergence is
  440. slow. (I.e. for multiple roots.)
  441. """
  442. maxsteps = 20
  443. def __init__(self, ctx, f, x0, **kwargs):
  444. self.ctx = ctx
  445. if not len(x0) == 1:
  446. raise ValueError('expected 1 starting point, got %i' % len(x0))
  447. self.x0 = x0[0]
  448. self.f = f
  449. if not 'df' in kwargs:
  450. def df(x):
  451. return self.ctx.diff(f, x)
  452. else:
  453. df = kwargs['df']
  454. self.df = df
  455. def phi(x):
  456. return x - f(x) / df(x)
  457. self.phi = phi
  458. self.verbose = kwargs['verbose']
  459. def __iter__(self):
  460. x0 = self.x0
  461. f = self.f
  462. df = self.df
  463. phi = self.phi
  464. error = 0
  465. counter = 0
  466. while True:
  467. prevx = x0
  468. try:
  469. x0 = phi(x0)
  470. except ZeroDivisionError:
  471. if self.verbose:
  472. print('ZeroDivisionError: canceled with x =', x0)
  473. break
  474. preverror = error
  475. error = abs(prevx - x0)
  476. # TODO: decide not to use convergence acceleration
  477. if error and abs(error - preverror) / error < 1:
  478. if self.verbose:
  479. print('converging slowly')
  480. counter += 1
  481. if counter >= 3:
  482. # accelerate convergence
  483. phi = steffensen(phi)
  484. counter = 0
  485. if self.verbose:
  486. print('accelerating convergence')
  487. yield x0, error
  488. # TODO: add Brent
  489. ############################
  490. # MULTIDIMENSIONAL SOLVERS #
  491. ############################
  492. def jacobian(ctx, f, x):
  493. """
  494. Calculate the Jacobian matrix of a function at the point x0.
  495. This is the first derivative of a vectorial function:
  496. f : R^m -> R^n with m >= n
  497. """
  498. x = ctx.matrix(x)
  499. h = ctx.sqrt(ctx.eps)
  500. fx = ctx.matrix(f(*x))
  501. m = len(fx)
  502. n = len(x)
  503. J = ctx.matrix(m, n)
  504. for j in xrange(n):
  505. xj = x.copy()
  506. xj[j] += h
  507. Jj = (ctx.matrix(f(*xj)) - fx) / h
  508. for i in xrange(m):
  509. J[i,j] = Jj[i]
  510. return J
  511. # TODO: test with user-specified jacobian matrix
  512. class MDNewton:
  513. """
  514. Find the root of a vector function numerically using Newton's method.
  515. f is a vector function representing a nonlinear equation system.
  516. x0 is the starting point close to the root.
  517. J is a function returning the Jacobian matrix for a point.
  518. Supports overdetermined systems.
  519. Use the 'norm' keyword to specify which norm to use. Defaults to max-norm.
  520. The function to calculate the Jacobian matrix can be given using the
  521. keyword 'J'. Otherwise it will be calculated numerically.
  522. Please note that this method converges only locally. Especially for high-
  523. dimensional systems it is not trivial to find a good starting point being
  524. close enough to the root.
  525. It is recommended to use a faster, low-precision solver from SciPy [1] or
  526. OpenOpt [2] to get an initial guess. Afterwards you can use this method for
  527. root-polishing to any precision.
  528. [1] http://scipy.org
  529. [2] http://openopt.org/Welcome
  530. """
  531. maxsteps = 10
  532. def __init__(self, ctx, f, x0, **kwargs):
  533. self.ctx = ctx
  534. self.f = f
  535. if isinstance(x0, (tuple, list)):
  536. x0 = ctx.matrix(x0)
  537. assert x0.cols == 1, 'need a vector'
  538. self.x0 = x0
  539. if 'J' in kwargs:
  540. self.J = kwargs['J']
  541. else:
  542. def J(*x):
  543. return ctx.jacobian(f, x)
  544. self.J = J
  545. self.norm = kwargs['norm']
  546. self.verbose = kwargs['verbose']
  547. def __iter__(self):
  548. f = self.f
  549. x0 = self.x0
  550. norm = self.norm
  551. J = self.J
  552. fx = self.ctx.matrix(f(*x0))
  553. fxnorm = norm(fx)
  554. cancel = False
  555. while not cancel:
  556. # get direction of descent
  557. fxn = -fx
  558. Jx = J(*x0)
  559. s = self.ctx.lu_solve(Jx, fxn)
  560. if self.verbose:
  561. print('Jx:')
  562. print(Jx)
  563. print('s:', s)
  564. # damping step size TODO: better strategy (hard task)
  565. l = self.ctx.one
  566. x1 = x0 + s
  567. while True:
  568. if x1 == x0:
  569. if self.verbose:
  570. print("canceled, won't get more excact")
  571. cancel = True
  572. break
  573. fx = self.ctx.matrix(f(*x1))
  574. newnorm = norm(fx)
  575. if newnorm < fxnorm:
  576. # new x accepted
  577. fxnorm = newnorm
  578. x0 = x1
  579. break
  580. l /= 2
  581. x1 = x0 + l*s
  582. yield (x0, fxnorm)
  583. #############
  584. # UTILITIES #
  585. #############
  586. str2solver = {'newton':Newton, 'secant':Secant, 'mnewton':MNewton,
  587. 'halley':Halley, 'muller':Muller, 'bisect':Bisection,
  588. 'illinois':Illinois, 'pegasus':Pegasus, 'anderson':Anderson,
  589. 'ridder':Ridder, 'anewton':ANewton, 'mdnewton':MDNewton}
  590. def findroot(ctx, f, x0, solver='secant', tol=None, verbose=False, verify=True, **kwargs):
  591. r"""
  592. Find an approximate solution to `f(x) = 0`, using *x0* as starting point or
  593. interval for *x*.
  594. Multidimensional overdetermined systems are supported.
  595. You can specify them using a function or a list of functions.
  596. Mathematically speaking, this function returns `x` such that
  597. `|f(x)|^2 \leq \mathrm{tol}` is true within the current working precision.
  598. If the computed value does not meet this criterion, an exception is raised.
  599. This exception can be disabled with *verify=False*.
  600. For interval arithmetic (``iv.findroot()``), please note that
  601. the returned interval ``x`` is not guaranteed to contain `f(x)=0`!
  602. It is only some `x` for which `|f(x)|^2 \leq \mathrm{tol}` certainly holds
  603. regardless of numerical error. This may be improved in the future.
  604. **Arguments**
  605. *f*
  606. one dimensional function
  607. *x0*
  608. starting point, several starting points or interval (depends on solver)
  609. *tol*
  610. the returned solution has an error smaller than this
  611. *verbose*
  612. print additional information for each iteration if true
  613. *verify*
  614. verify the solution and raise a ValueError if `|f(x)|^2 > \mathrm{tol}`
  615. *solver*
  616. a generator for *f* and *x0* returning approximative solution and error
  617. *maxsteps*
  618. after how many steps the solver will cancel
  619. *df*
  620. first derivative of *f* (used by some solvers)
  621. *d2f*
  622. second derivative of *f* (used by some solvers)
  623. *multidimensional*
  624. force multidimensional solving
  625. *J*
  626. Jacobian matrix of *f* (used by multidimensional solvers)
  627. *norm*
  628. used vector norm (used by multidimensional solvers)
  629. solver has to be callable with ``(f, x0, **kwargs)`` and return an generator
  630. yielding pairs of approximative solution and estimated error (which is
  631. expected to be positive).
  632. You can use the following string aliases:
  633. 'secant', 'mnewton', 'halley', 'muller', 'illinois', 'pegasus', 'anderson',
  634. 'ridder', 'anewton', 'bisect'
  635. See mpmath.calculus.optimization for their documentation.
  636. **Examples**
  637. The function :func:`~mpmath.findroot` locates a root of a given function using the
  638. secant method by default. A simple example use of the secant method is to
  639. compute `\pi` as the root of `\sin x` closest to `x_0 = 3`::
  640. >>> from mpmath import *
  641. >>> mp.dps = 30; mp.pretty = True
  642. >>> findroot(sin, 3)
  643. 3.14159265358979323846264338328
  644. The secant method can be used to find complex roots of analytic functions,
  645. although it must in that case generally be given a nonreal starting value
  646. (or else it will never leave the real line)::
  647. >>> mp.dps = 15
  648. >>> findroot(lambda x: x**3 + 2*x + 1, j)
  649. (0.226698825758202 + 1.46771150871022j)
  650. A nice application is to compute nontrivial roots of the Riemann zeta
  651. function with many digits (good initial values are needed for convergence)::
  652. >>> mp.dps = 30
  653. >>> findroot(zeta, 0.5+14j)
  654. (0.5 + 14.1347251417346937904572519836j)
  655. The secant method can also be used as an optimization algorithm, by passing
  656. it a derivative of a function. The following example locates the positive
  657. minimum of the gamma function::
  658. >>> mp.dps = 20
  659. >>> findroot(lambda x: diff(gamma, x), 1)
  660. 1.4616321449683623413
  661. Finally, a useful application is to compute inverse functions, such as the
  662. Lambert W function which is the inverse of `w e^w`, given the first
  663. term of the solution's asymptotic expansion as the initial value. In basic
  664. cases, this gives identical results to mpmath's built-in ``lambertw``
  665. function::
  666. >>> def lambert(x):
  667. ... return findroot(lambda w: w*exp(w) - x, log(1+x))
  668. ...
  669. >>> mp.dps = 15
  670. >>> lambert(1); lambertw(1)
  671. 0.567143290409784
  672. 0.567143290409784
  673. >>> lambert(1000); lambert(1000)
  674. 5.2496028524016
  675. 5.2496028524016
  676. Multidimensional functions are also supported::
  677. >>> f = [lambda x1, x2: x1**2 + x2,
  678. ... lambda x1, x2: 5*x1**2 - 3*x1 + 2*x2 - 3]
  679. >>> findroot(f, (0, 0))
  680. [-0.618033988749895]
  681. [-0.381966011250105]
  682. >>> findroot(f, (10, 10))
  683. [ 1.61803398874989]
  684. [-2.61803398874989]
  685. You can verify this by solving the system manually.
  686. Please note that the following (more general) syntax also works::
  687. >>> def f(x1, x2):
  688. ... return x1**2 + x2, 5*x1**2 - 3*x1 + 2*x2 - 3
  689. ...
  690. >>> findroot(f, (0, 0))
  691. [-0.618033988749895]
  692. [-0.381966011250105]
  693. **Multiple roots**
  694. For multiple roots all methods of the Newtonian family (including secant)
  695. converge slowly. Consider this example::
  696. >>> f = lambda x: (x - 1)**99
  697. >>> findroot(f, 0.9, verify=False)
  698. 0.918073542444929
  699. Even for a very close starting point the secant method converges very
  700. slowly. Use ``verbose=True`` to illustrate this.
  701. It is possible to modify Newton's method to make it converge regardless of
  702. the root's multiplicity::
  703. >>> findroot(f, -10, solver='mnewton')
  704. 1.0
  705. This variant uses the first and second derivative of the function, which is
  706. not very efficient.
  707. Alternatively you can use an experimental Newtonian solver that keeps track
  708. of the speed of convergence and accelerates it using Steffensen's method if
  709. necessary::
  710. >>> findroot(f, -10, solver='anewton', verbose=True)
  711. x: -9.88888888888888888889
  712. error: 0.111111111111111111111
  713. converging slowly
  714. x: -9.77890011223344556678
  715. error: 0.10998877665544332211
  716. converging slowly
  717. x: -9.67002233332199662166
  718. error: 0.108877778911448945119
  719. converging slowly
  720. accelerating convergence
  721. x: -9.5622443299551077669
  722. error: 0.107778003366888854764
  723. converging slowly
  724. x: 0.99999999999999999214
  725. error: 10.562244329955107759
  726. x: 1.0
  727. error: 7.8598304758094664213e-18
  728. ZeroDivisionError: canceled with x = 1.0
  729. 1.0
  730. **Complex roots**
  731. For complex roots it's recommended to use Muller's method as it converges
  732. even for real starting points very fast::
  733. >>> findroot(lambda x: x**4 + x + 1, (0, 1, 2), solver='muller')
  734. (0.727136084491197 + 0.934099289460529j)
  735. **Intersection methods**
  736. When you need to find a root in a known interval, it's highly recommended to
  737. use an intersection-based solver like ``'anderson'`` or ``'ridder'``.
  738. Usually they converge faster and more reliable. They have however problems
  739. with multiple roots and usually need a sign change to find a root::
  740. >>> findroot(lambda x: x**3, (-1, 1), solver='anderson')
  741. 0.0
  742. Be careful with symmetric functions::
  743. >>> findroot(lambda x: x**2, (-1, 1), solver='anderson') #doctest:+ELLIPSIS
  744. Traceback (most recent call last):
  745. ...
  746. ZeroDivisionError
  747. It fails even for better starting points, because there is no sign change::
  748. >>> findroot(lambda x: x**2, (-1, .5), solver='anderson')
  749. Traceback (most recent call last):
  750. ...
  751. ValueError: Could not find root within given tolerance. (1.0 > 2.16840434497100886801e-19)
  752. Try another starting point or tweak arguments.
  753. """
  754. prec = ctx.prec
  755. try:
  756. ctx.prec += 20
  757. # initialize arguments
  758. if tol is None:
  759. tol = ctx.eps * 2**10
  760. kwargs['verbose'] = kwargs.get('verbose', verbose)
  761. if 'd1f' in kwargs:
  762. kwargs['df'] = kwargs['d1f']
  763. kwargs['tol'] = tol
  764. if isinstance(x0, (list, tuple)):
  765. x0 = [ctx.convert(x) for x in x0]
  766. else:
  767. x0 = [ctx.convert(x0)]
  768. if isinstance(solver, str):
  769. try:
  770. solver = str2solver[solver]
  771. except KeyError:
  772. raise ValueError('could not recognize solver')
  773. # accept list of functions
  774. if isinstance(f, (list, tuple)):
  775. f2 = copy(f)
  776. def tmp(*args):
  777. return [fn(*args) for fn in f2]
  778. f = tmp
  779. # detect multidimensional functions
  780. try:
  781. fx = f(*x0)
  782. multidimensional = isinstance(fx, (list, tuple, ctx.matrix))
  783. except TypeError:
  784. fx = f(x0[0])
  785. multidimensional = False
  786. if 'multidimensional' in kwargs:
  787. multidimensional = kwargs['multidimensional']
  788. if multidimensional:
  789. # only one multidimensional solver available at the moment
  790. solver = MDNewton
  791. if not 'norm' in kwargs:
  792. norm = lambda x: ctx.norm(x, 'inf')
  793. kwargs['norm'] = norm
  794. else:
  795. norm = kwargs['norm']
  796. else:
  797. norm = abs
  798. # happily return starting point if it's a root
  799. if norm(fx) == 0:
  800. if multidimensional:
  801. return ctx.matrix(x0)
  802. else:
  803. return x0[0]
  804. # use solver
  805. iterations = solver(ctx, f, x0, **kwargs)
  806. if 'maxsteps' in kwargs:
  807. maxsteps = kwargs['maxsteps']
  808. else:
  809. maxsteps = iterations.maxsteps
  810. i = 0
  811. for x, error in iterations:
  812. if verbose:
  813. print('x: ', x)
  814. print('error:', error)
  815. i += 1
  816. if error < tol * max(1, norm(x)) or i >= maxsteps:
  817. break
  818. else:
  819. if not i:
  820. raise ValueError('Could not find root using the given solver.\n'
  821. 'Try another starting point or tweak arguments.')
  822. if not isinstance(x, (list, tuple, ctx.matrix)):
  823. xl = [x]
  824. else:
  825. xl = x
  826. if verify and norm(f(*xl))**2 > tol: # TODO: better condition?
  827. raise ValueError('Could not find root within given tolerance. '
  828. '(%s > %s)\n'
  829. 'Try another starting point or tweak arguments.'
  830. % (norm(f(*xl))**2, tol))
  831. return x
  832. finally:
  833. ctx.prec = prec
  834. def multiplicity(ctx, f, root, tol=None, maxsteps=10, **kwargs):
  835. """
  836. Return the multiplicity of a given root of f.
  837. Internally, numerical derivatives are used. This might be inefficient for
  838. higher order derviatives. Due to this, ``multiplicity`` cancels after
  839. evaluating 10 derivatives by default. You can be specify the n-th derivative
  840. using the dnf keyword.
  841. >>> from mpmath import *
  842. >>> multiplicity(lambda x: sin(x) - 1, pi/2)
  843. 2
  844. """
  845. if tol is None:
  846. tol = ctx.eps ** 0.8
  847. kwargs['d0f'] = f
  848. for i in xrange(maxsteps):
  849. dfstr = 'd' + str(i) + 'f'
  850. if dfstr in kwargs:
  851. df = kwargs[dfstr]
  852. else:
  853. df = lambda x: ctx.diff(f, x, i)
  854. if not abs(df(root)) < tol:
  855. break
  856. return i
  857. def steffensen(f):
  858. """
  859. linear convergent function -> quadratic convergent function
  860. Steffensen's method for quadratic convergence of a linear converging
  861. sequence.
  862. Don not use it for higher rates of convergence.
  863. It may even work for divergent sequences.
  864. Definition:
  865. F(x) = (x*f(f(x)) - f(x)**2) / (f(f(x)) - 2*f(x) + x)
  866. Example
  867. .......
  868. You can use Steffensen's method to accelerate a fixpoint iteration of linear
  869. (or less) convergence.
  870. x* is a fixpoint of the iteration x_{k+1} = phi(x_k) if x* = phi(x*). For
  871. phi(x) = x**2 there are two fixpoints: 0 and 1.
  872. Let's try Steffensen's method:
  873. >>> f = lambda x: x**2
  874. >>> from mpmath.calculus.optimization import steffensen
  875. >>> F = steffensen(f)
  876. >>> for x in [0.5, 0.9, 2.0]:
  877. ... fx = Fx = x
  878. ... for i in xrange(9):
  879. ... try:
  880. ... fx = f(fx)
  881. ... except OverflowError:
  882. ... pass
  883. ... try:
  884. ... Fx = F(Fx)
  885. ... except ZeroDivisionError:
  886. ... pass
  887. ... print('%20g %20g' % (fx, Fx))
  888. 0.25 -0.5
  889. 0.0625 0.1
  890. 0.00390625 -0.0011236
  891. 1.52588e-05 1.41691e-09
  892. 2.32831e-10 -2.84465e-27
  893. 5.42101e-20 2.30189e-80
  894. 2.93874e-39 -1.2197e-239
  895. 8.63617e-78 0
  896. 7.45834e-155 0
  897. 0.81 1.02676
  898. 0.6561 1.00134
  899. 0.430467 1
  900. 0.185302 1
  901. 0.0343368 1
  902. 0.00117902 1
  903. 1.39008e-06 1
  904. 1.93233e-12 1
  905. 3.73392e-24 1
  906. 4 1.6
  907. 16 1.2962
  908. 256 1.10194
  909. 65536 1.01659
  910. 4.29497e+09 1.00053
  911. 1.84467e+19 1
  912. 3.40282e+38 1
  913. 1.15792e+77 1
  914. 1.34078e+154 1
  915. Unmodified, the iteration converges only towards 0. Modified it converges
  916. not only much faster, it converges even to the repelling fixpoint 1.
  917. """
  918. def F(x):
  919. fx = f(x)
  920. ffx = f(fx)
  921. return (x*ffx - fx**2) / (ffx - 2*fx + x)
  922. return F
  923. OptimizationMethods.jacobian = jacobian
  924. OptimizationMethods.findroot = findroot
  925. OptimizationMethods.multiplicity = multiplicity
  926. if __name__ == '__main__':
  927. import doctest
  928. doctest.testmod()