qs.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. from sympy.core.numbers import igcd, mod_inverse
  2. from sympy.core.power import integer_nthroot
  3. from sympy.ntheory.residue_ntheory import _sqrt_mod_prime_power
  4. from sympy.ntheory import isprime
  5. from math import log, sqrt
  6. import random
  7. rgen = random.Random()
  8. class SievePolynomial:
  9. def __init__(self, modified_coeff=(), a=None, b=None):
  10. """This class denotes the seive polynomial.
  11. If ``g(x) = (a*x + b)**2 - N``. `g(x)` can be expanded
  12. to ``a*x**2 + 2*a*b*x + b**2 - N``, so the coefficient
  13. is stored in the form `[a**2, 2*a*b, b**2 - N]`. This
  14. ensures faster `eval` method because we dont have to
  15. perform `a**2, 2*a*b, b**2` every time we call the
  16. `eval` method. As multiplication is more expensive
  17. than addition, by using modified_coefficient we get
  18. a faster seiving process.
  19. Parameters
  20. ==========
  21. modified_coeff : modified_coefficient of sieve polynomial
  22. a : parameter of the sieve polynomial
  23. b : parameter of the sieve polynomial
  24. """
  25. self.modified_coeff = modified_coeff
  26. self.a = a
  27. self.b = b
  28. def eval(self, x):
  29. """
  30. Compute the value of the sieve polynomial at point x.
  31. Parameters
  32. ==========
  33. x : Integer parameter for sieve polynomial
  34. """
  35. ans = 0
  36. for coeff in self.modified_coeff:
  37. ans *= x
  38. ans += coeff
  39. return ans
  40. class FactorBaseElem:
  41. """This class stores an element of the `factor_base`.
  42. """
  43. def __init__(self, prime, tmem_p, log_p):
  44. """
  45. Initialization of factor_base_elem.
  46. Parameters
  47. ==========
  48. prime : prime number of the factor_base
  49. tmem_p : Integer square root of x**2 = n mod prime
  50. log_p : Compute Natural Logarithm of the prime
  51. """
  52. self.prime = prime
  53. self.tmem_p = tmem_p
  54. self.log_p = log_p
  55. self.soln1 = None
  56. self.soln2 = None
  57. self.a_inv = None
  58. self.b_ainv = None
  59. def _generate_factor_base(prime_bound, n):
  60. """Generate `factor_base` for Quadratic Sieve. The `factor_base`
  61. consists of all the points whose ``legendre_symbol(n, p) == 1``
  62. and ``p < num_primes``. Along with the prime `factor_base` also stores
  63. natural logarithm of prime and the residue n modulo p.
  64. It also returns the of primes numbers in the `factor_base` which are
  65. close to 1000 and 5000.
  66. Parameters
  67. ==========
  68. prime_bound : upper prime bound of the factor_base
  69. n : integer to be factored
  70. """
  71. from sympy.ntheory.generate import sieve
  72. factor_base = []
  73. idx_1000, idx_5000 = None, None
  74. for prime in sieve.primerange(1, prime_bound):
  75. if pow(n, (prime - 1) // 2, prime) == 1:
  76. if prime > 1000 and idx_1000 is None:
  77. idx_1000 = len(factor_base) - 1
  78. if prime > 5000 and idx_5000 is None:
  79. idx_5000 = len(factor_base) - 1
  80. residue = _sqrt_mod_prime_power(n, prime, 1)[0]
  81. log_p = round(log(prime)*2**10)
  82. factor_base.append(FactorBaseElem(prime, residue, log_p))
  83. return idx_1000, idx_5000, factor_base
  84. def _initialize_first_polynomial(N, M, factor_base, idx_1000, idx_5000, seed=None):
  85. """This step is the initialization of the 1st sieve polynomial.
  86. Here `a` is selected as a product of several primes of the factor_base
  87. such that `a` is about to ``sqrt(2*N) / M``. Other initial values of
  88. factor_base elem are also intialized which includes a_inv, b_ainv, soln1,
  89. soln2 which are used when the sieve polynomial is changed. The b_ainv
  90. is required for fast polynomial change as we do not have to calculate
  91. `2*b*mod_inverse(a, prime)` every time.
  92. We also ensure that the `factor_base` primes which make `a` are between
  93. 1000 and 5000.
  94. Parameters
  95. ==========
  96. N : Number to be factored
  97. M : sieve interval
  98. factor_base : factor_base primes
  99. idx_1000 : index of prime numbe in the factor_base near 1000
  100. idx_5000 : index of primenumber in the factor_base near to 5000
  101. seed : Generate pseudoprime numbers
  102. """
  103. if seed is not None:
  104. rgen.seed(seed)
  105. approx_val = sqrt(2*N) / M
  106. # `a` is a parameter of the sieve polynomial and `q` is the prime factors of `a`
  107. # randomly search for a combination of primes whose multiplication is close to approx_val
  108. # This multiplication of primes will be `a` and the primes will be `q`
  109. # `best_a` denotes that `a` is close to approx_val in the random search of combination
  110. best_a, best_q, best_ratio = None, None, None
  111. start = 0 if idx_1000 is None else idx_1000
  112. end = len(factor_base) - 1 if idx_5000 is None else idx_5000
  113. for _ in range(50):
  114. a = 1
  115. q = []
  116. while(a < approx_val):
  117. rand_p = 0
  118. while(rand_p == 0 or rand_p in q):
  119. rand_p = rgen.randint(start, end)
  120. p = factor_base[rand_p].prime
  121. a *= p
  122. q.append(rand_p)
  123. ratio = a / approx_val
  124. if best_ratio is None or abs(ratio - 1) < abs(best_ratio - 1):
  125. best_q = q
  126. best_a = a
  127. best_ratio = ratio
  128. a = best_a
  129. q = best_q
  130. B = []
  131. for idx, val in enumerate(q):
  132. q_l = factor_base[val].prime
  133. gamma = factor_base[val].tmem_p * mod_inverse(a // q_l, q_l) % q_l
  134. if gamma > q_l / 2:
  135. gamma = q_l - gamma
  136. B.append(a//q_l*gamma)
  137. b = sum(B)
  138. g = SievePolynomial([a*a, 2*a*b, b*b - N], a, b)
  139. for fb in factor_base:
  140. if a % fb.prime == 0:
  141. continue
  142. fb.a_inv = mod_inverse(a, fb.prime)
  143. fb.b_ainv = [2*b_elem*fb.a_inv % fb.prime for b_elem in B]
  144. fb.soln1 = (fb.a_inv*(fb.tmem_p - b)) % fb.prime
  145. fb.soln2 = (fb.a_inv*(-fb.tmem_p - b)) % fb.prime
  146. return g, B
  147. def _initialize_ith_poly(N, factor_base, i, g, B):
  148. """Initialization stage of ith poly. After we finish sieving 1`st polynomial
  149. here we quickly change to the next polynomial from which we will again
  150. start sieving. Suppose we generated ith sieve polynomial and now we
  151. want to generate (i + 1)th polynomial, where ``1 <= i <= 2**(j - 1) - 1``
  152. where `j` is the number of prime factors of the coefficient `a`
  153. then this function can be used to go to the next polynomial. If
  154. ``i = 2**(j - 1) - 1`` then go to _initialize_first_polynomial stage.
  155. Parameters
  156. ==========
  157. N : number to be factored
  158. factor_base : factor_base primes
  159. i : integer denoting ith polynomial
  160. g : (i - 1)th polynomial
  161. B : array that stores a//q_l*gamma
  162. """
  163. from sympy.functions.elementary.integers import ceiling
  164. v = 1
  165. j = i
  166. while(j % 2 == 0):
  167. v += 1
  168. j //= 2
  169. if ceiling(i / (2**v)) % 2 == 1:
  170. neg_pow = -1
  171. else:
  172. neg_pow = 1
  173. b = g.b + 2*neg_pow*B[v - 1]
  174. a = g.a
  175. g = SievePolynomial([a*a, 2*a*b, b*b - N], a, b)
  176. for fb in factor_base:
  177. if a % fb.prime == 0:
  178. continue
  179. fb.soln1 = (fb.soln1 - neg_pow*fb.b_ainv[v - 1]) % fb.prime
  180. fb.soln2 = (fb.soln2 - neg_pow*fb.b_ainv[v - 1]) % fb.prime
  181. return g
  182. def _gen_sieve_array(M, factor_base):
  183. """Sieve Stage of the Quadratic Sieve. For every prime in the factor_base
  184. that doesn't divide the coefficient `a` we add log_p over the sieve_array
  185. such that ``-M <= soln1 + i*p <= M`` and ``-M <= soln2 + i*p <= M`` where `i`
  186. is an integer. When p = 2 then log_p is only added using
  187. ``-M <= soln1 + i*p <= M``.
  188. Parameters
  189. ==========
  190. M : sieve interval
  191. factor_base : factor_base primes
  192. """
  193. sieve_array = [0]*(2*M + 1)
  194. for factor in factor_base:
  195. if factor.soln1 is None: #The prime does not divides a
  196. continue
  197. for idx in range((M + factor.soln1) % factor.prime, 2*M, factor.prime):
  198. sieve_array[idx] += factor.log_p
  199. if factor.prime == 2:
  200. continue
  201. #if prime is 2 then sieve only with soln_1_p
  202. for idx in range((M + factor.soln2) % factor.prime, 2*M, factor.prime):
  203. sieve_array[idx] += factor.log_p
  204. return sieve_array
  205. def _check_smoothness(num, factor_base):
  206. """Here we check that if `num` is a smooth number or not. If `a` is a smooth
  207. number then it returns a vector of prime exponents modulo 2. For example
  208. if a = 2 * 5**2 * 7**3 and the factor base contains {2, 3, 5, 7} then
  209. `a` is a smooth number and this function returns ([1, 0, 0, 1], True). If
  210. `a` is a partial relation which means that `a` a has one prime factor
  211. greater than the `factor_base` then it returns `(a, False)` which denotes `a`
  212. is a partial relation.
  213. Parameters
  214. ==========
  215. a : integer whose smootheness is to be checked
  216. factor_base : factor_base primes
  217. """
  218. vec = []
  219. if num < 0:
  220. vec.append(1)
  221. num *= -1
  222. else:
  223. vec.append(0)
  224. #-1 is not included in factor_base add -1 in vector
  225. for factor in factor_base:
  226. if num % factor.prime != 0:
  227. vec.append(0)
  228. continue
  229. factor_exp = 0
  230. while num % factor.prime == 0:
  231. factor_exp += 1
  232. num //= factor.prime
  233. vec.append(factor_exp % 2)
  234. if num == 1:
  235. return vec, True
  236. if isprime(num):
  237. return num, False
  238. return None, None
  239. def _trial_division_stage(N, M, factor_base, sieve_array, sieve_poly, partial_relations, ERROR_TERM):
  240. """Trial division stage. Here we trial divide the values generetated
  241. by sieve_poly in the sieve interval and if it is a smooth number then
  242. it is stored in `smooth_relations`. Moreover, if we find two partial relations
  243. with same large prime then they are combined to form a smooth relation.
  244. First we iterate over sieve array and look for values which are greater
  245. than accumulated_val, as these values have a high chance of being smooth
  246. number. Then using these values we find smooth relations.
  247. In general, let ``t**2 = u*p modN`` and ``r**2 = v*p modN`` be two partial relations
  248. with the same large prime p. Then they can be combined ``(t*r/p)**2 = u*v modN``
  249. to form a smooth relation.
  250. Parameters
  251. ==========
  252. N : Number to be factored
  253. M : sieve interval
  254. factor_base : factor_base primes
  255. sieve_array : stores log_p values
  256. sieve_poly : polynomial from which we find smooth relations
  257. partial_relations : stores partial relations with one large prime
  258. ERROR_TERM : error term for accumulated_val
  259. """
  260. sqrt_n = sqrt(float(N))
  261. accumulated_val = log(M * sqrt_n)*2**10 - ERROR_TERM
  262. smooth_relations = []
  263. proper_factor = set()
  264. partial_relation_upper_bound = 128*factor_base[-1].prime
  265. for idx, val in enumerate(sieve_array):
  266. if val < accumulated_val:
  267. continue
  268. x = idx - M
  269. v = sieve_poly.eval(x)
  270. vec, is_smooth = _check_smoothness(v, factor_base)
  271. if is_smooth is None:#Neither smooth nor partial
  272. continue
  273. u = sieve_poly.a*x + sieve_poly.b
  274. # Update the partial relation
  275. # If 2 partial relation with same large prime is found then generate smooth relation
  276. if is_smooth is False:#partial relation found
  277. large_prime = vec
  278. #Consider the large_primes under 128*F
  279. if large_prime > partial_relation_upper_bound:
  280. continue
  281. if large_prime not in partial_relations:
  282. partial_relations[large_prime] = (u, v)
  283. continue
  284. else:
  285. u_prev, v_prev = partial_relations[large_prime]
  286. partial_relations.pop(large_prime)
  287. try:
  288. large_prime_inv = mod_inverse(large_prime, N)
  289. except ValueError:#if large_prine divides N
  290. proper_factor.add(large_prime)
  291. continue
  292. u = u*u_prev*large_prime_inv
  293. v = v*v_prev // (large_prime*large_prime)
  294. vec, is_smooth = _check_smoothness(v, factor_base)
  295. #assert u*u % N == v % N
  296. smooth_relations.append((u, v, vec))
  297. return smooth_relations, proper_factor
  298. #LINEAR ALGEBRA STAGE
  299. def _build_matrix(smooth_relations):
  300. """Build a 2D matrix from smooth relations.
  301. Parameters
  302. ==========
  303. smooth_relations : Stores smooth relations
  304. """
  305. matrix = []
  306. for s_relation in smooth_relations:
  307. matrix.append(s_relation[2])
  308. return matrix
  309. def _gauss_mod_2(A):
  310. """Fast gaussian reduction for modulo 2 matrix.
  311. Parameters
  312. ==========
  313. A : Matrix
  314. Examples
  315. ========
  316. >>> from sympy.ntheory.qs import _gauss_mod_2
  317. >>> _gauss_mod_2([[0, 1, 1], [1, 0, 1], [0, 1, 0], [1, 1, 1]])
  318. ([[[1, 0, 1], 3]],
  319. [True, True, True, False],
  320. [[0, 1, 0], [1, 0, 0], [0, 0, 1], [1, 0, 1]])
  321. Reference
  322. ==========
  323. .. [1] A fast algorithm for gaussian elimination over GF(2) and
  324. its implementation on the GAPP. Cetin K.Koc, Sarath N.Arachchige"""
  325. import copy
  326. matrix = copy.deepcopy(A)
  327. row = len(matrix)
  328. col = len(matrix[0])
  329. mark = [False]*row
  330. for c in range(col):
  331. for r in range(row):
  332. if matrix[r][c] == 1:
  333. break
  334. mark[r] = True
  335. for c1 in range(col):
  336. if c1 == c:
  337. continue
  338. if matrix[r][c1] == 1:
  339. for r2 in range(row):
  340. matrix[r2][c1] = (matrix[r2][c1] + matrix[r2][c]) % 2
  341. dependent_row = []
  342. for idx, val in enumerate(mark):
  343. if val == False:
  344. dependent_row.append([matrix[idx], idx])
  345. return dependent_row, mark, matrix
  346. def _find_factor(dependent_rows, mark, gauss_matrix, index, smooth_relations, N):
  347. """Finds proper factor of N. Here, transform the dependent rows as a
  348. combination of independent rows of the gauss_matrix to form the desired
  349. relation of the form ``X**2 = Y**2 modN``. After obtaining the desired relation
  350. we obtain a proper factor of N by `gcd(X - Y, N)`.
  351. Parameters
  352. ==========
  353. dependent_rows : denoted dependent rows in the reduced matrix form
  354. mark : boolean array to denoted dependent and independent rows
  355. gauss_matrix : Reduced form of the smooth relations matrix
  356. index : denoted the index of the dependent_rows
  357. smooth_relations : Smooth relations vectors matrix
  358. N : Number to be factored
  359. """
  360. idx_in_smooth = dependent_rows[index][1]
  361. independent_u = [smooth_relations[idx_in_smooth][0]]
  362. independent_v = [smooth_relations[idx_in_smooth][1]]
  363. dept_row = dependent_rows[index][0]
  364. for idx, val in enumerate(dept_row):
  365. if val == 1:
  366. for row in range(len(gauss_matrix)):
  367. if gauss_matrix[row][idx] == 1 and mark[row] == True:
  368. independent_u.append(smooth_relations[row][0])
  369. independent_v.append(smooth_relations[row][1])
  370. break
  371. u = 1
  372. v = 1
  373. for i in independent_u:
  374. u *= i
  375. for i in independent_v:
  376. v *= i
  377. #assert u**2 % N == v % N
  378. v = integer_nthroot(v, 2)[0]
  379. return igcd(u - v, N)
  380. def qs(N, prime_bound, M, ERROR_TERM=25, seed=1234):
  381. """Performs factorization using Self-Initializing Quadratic Sieve.
  382. In SIQS, let N be a number to be factored, and this N should not be a
  383. perfect power. If we find two integers such that ``X**2 = Y**2 modN`` and
  384. ``X != +-Y modN``, then `gcd(X + Y, N)` will reveal a proper factor of N.
  385. In order to find these integers X and Y we try to find relations of form
  386. t**2 = u modN where u is a product of small primes. If we have enough of
  387. these relations then we can form ``(t1*t2...ti)**2 = u1*u2...ui modN`` such that
  388. the right hand side is a square, thus we found a relation of ``X**2 = Y**2 modN``.
  389. Here, several optimizations are done like using muliple polynomials for
  390. sieving, fast changing between polynomials and using partial relations.
  391. The use of partial relations can speeds up the factoring by 2 times.
  392. Parameters
  393. ==========
  394. N : Number to be Factored
  395. prime_bound : upper bound for primes in the factor base
  396. M : Sieve Interval
  397. ERROR_TERM : Error term for checking smoothness
  398. threshold : Extra smooth relations for factorization
  399. seed : generate pseudo prime numbers
  400. Examples
  401. ========
  402. >>> from sympy.ntheory import qs
  403. >>> qs(25645121643901801, 2000, 10000)
  404. {5394769, 4753701529}
  405. >>> qs(9804659461513846513, 2000, 10000)
  406. {4641991, 2112166839943}
  407. References
  408. ==========
  409. .. [1] https://pdfs.semanticscholar.org/5c52/8a975c1405bd35c65993abf5a4edb667c1db.pdf
  410. .. [2] https://www.rieselprime.de/ziki/Self-initializing_quadratic_sieve
  411. """
  412. ERROR_TERM*=2**10
  413. rgen.seed(seed)
  414. idx_1000, idx_5000, factor_base = _generate_factor_base(prime_bound, N)
  415. smooth_relations = []
  416. ith_poly = 0
  417. partial_relations = {}
  418. proper_factor = set()
  419. threshold = 5*len(factor_base) // 100
  420. while True:
  421. if ith_poly == 0:
  422. ith_sieve_poly, B_array = _initialize_first_polynomial(N, M, factor_base, idx_1000, idx_5000)
  423. else:
  424. ith_sieve_poly = _initialize_ith_poly(N, factor_base, ith_poly, ith_sieve_poly, B_array)
  425. ith_poly += 1
  426. if ith_poly >= 2**(len(B_array) - 1): # time to start with a new sieve polynomial
  427. ith_poly = 0
  428. sieve_array = _gen_sieve_array(M, factor_base)
  429. s_rel, p_f = _trial_division_stage(N, M, factor_base, sieve_array, ith_sieve_poly, partial_relations, ERROR_TERM)
  430. smooth_relations += s_rel
  431. proper_factor |= p_f
  432. if len(smooth_relations) >= len(factor_base) + threshold:
  433. break
  434. matrix = _build_matrix(smooth_relations)
  435. dependent_row, mark, gauss_matrix = _gauss_mod_2(matrix)
  436. N_copy = N
  437. for index in range(len(dependent_row)):
  438. factor = _find_factor(dependent_row, mark, gauss_matrix, index, smooth_relations, N)
  439. if factor > 1 and factor < N:
  440. proper_factor.add(factor)
  441. while(N_copy % factor == 0):
  442. N_copy //= factor
  443. if isprime(N_copy):
  444. proper_factor.add(N_copy)
  445. break
  446. if(N_copy == 1):
  447. break
  448. return proper_factor