PyPy 5.4.1 64bit (linux), HP (80) ~ 1.54s
La versión de 32 bits será un poco más lenta.
Utilizo cuatro métodos de factorización diferentes, con puntos de corte determinados empíricamente:
Intenté por un tiempo encontrar una ruptura limpia entre ECF y MPQS, pero no parece haber una. Sin embargo, si n contiene un factor pequeño, ECF generalmente lo encontrará casi de inmediato, por lo que opté por probar solo unas pocas curvas, antes de pasar a MPQS.
Actualmente, es solo ~ 2 veces más lento que Mathmatica, lo que sin duda considero un éxito.
home-prime.py
import math
import my_math
import mpqs
max_trial = 1e10
max_pollard = 1e22
def factor(n):
if n < max_trial:
return factor_trial(n)
for p in my_math.small_primes:
if n%p == 0:
return [p] + factor(n/p)
if my_math.is_prime(n):
return [n]
if n < max_pollard:
p = pollard_rho(n)
else:
p = lenstra_ecf(n) or mpqs.mpqs(n)
return factor(p) + factor(n/p)
def factor_trial(n):
a = []
for p in my_math.small_primes:
while n%p == 0:
a += [p]
n /= p
i = 211
while i*i < n:
for o in my_math.offsets:
i += o
while n%i == 0:
a += [i]
n /= i
if n > 1:
a += [n]
return a
def pollard_rho(n):
# Brent's variant
y, r, q = 0, 1, 1
c, m = 9, 40
g = 1
while g == 1:
x = y
for i in range(r):
y = (y*y + c) % n
k = 0
while k < r and g == 1:
ys = y
for j in range(min(m, r-k)):
y = (y*y + c) % n
q = q*abs(x-y) % n
g = my_math.gcd(q, n)
k += m
r *= 2
if g == n:
ys = (ys*ys + c) % n
g = gcd(n, abs(x-ys))
while g == 1:
ys = (ys*ys + c) % n
g = gcd(n, abs(x-ys))
return g
def ec_add((x1, z1), (x2, z2), (x0, z0), n):
t1, t2 = (x1-z1)*(x2+z2), (x1+z1)*(x2-z2)
x, z = t1+t2, t1-t2
return (z0*x*x % n, x0*z*z % n)
def ec_double((x, z), (a, b), n):
t1 = x+z; t1 *= t1
t2 = x-z; t2 *= t2
t3 = t1 - t2
t4 = 4*b*t2
return (t1*t4 % n, t3*(t4 + a*t3) % n)
def ec_multiply(k, p, C, n):
# Montgomery ladder algorithm
p0 = p
q, p = p, ec_double(p, C, n)
b = k >> 1
while b > (b & -b):
b ^= b & -b
while b:
if k&b:
q, p = ec_add(p, q, p0, n), ec_double(p, C, n)
else:
q, p = ec_double(q, C, n), ec_add(p, q, p0, n),
b >>= 1
return q
def lenstra_ecf(n, m = 5):
# Montgomery curves w/ Suyama parameterization.
# Based on pseudocode found in:
# "Implementing the Elliptic Curve Method of Factoring in Reconfigurable Hardware"
# Gaj, Kris et. al
# http://www.hyperelliptic.org/tanja/SHARCS/talks06/Gaj.pdf
# Phase 2 is not implemented.
B1, B2 = 8, 13
for i in range(m):
pg = my_math.primes()
p = pg.next()
k = 1
while p < B1:
k *= p**int(math.log(B1, p))
p = pg.next()
for s in range(B1, B2):
u, v = s*s-5, 4*s
x = u*u*u
z = v*v*v
t = pow(v-u, 3, n)
P = (x, z)
C = (t*(3*u+v) % n, 4*x*v % n)
Q = ec_multiply(k, P, C, n)
g = my_math.gcd(Q[1], n)
if 1 < g < n: return g
B1, B2 = B2, B1 + B2
if __name__ == '__main__':
import time
import sys
for n in sys.argv[1:]:
t0 = time.time()
i = int(n)
f = []
while len(f) != 1:
f = sorted(factor(i))
#print i, f
i = int(''.join(map(str, f)))
t1 = time.time()-t0
print n, i
print '%.3fs'%(t1)
print
Tiempos de muestra
$ pypy home-prime.py 8 16 20 64 65 80
8 3331113965338635107
0.005s
16 31636373
0.001s
20 3318308475676071413
0.004s
64 1272505013723
0.000s
65 1381321118321175157763339900357651
0.397s
80 313169138727147145210044974146858220729781791489
1.537s
El promedio de los 5 es de aproximadamente 0.39s.
Dependencias
mpqs.py
se toma directamente de mi respuesta a la factorización semiprime más rápida con algunas modificaciones muy menores.
mpqs.py
import math
import my_math
import time
# Multiple Polynomial Quadratic Sieve
def mpqs(n, verbose=False):
if verbose:
time1 = time.time()
root_n = my_math.isqrt(n)
root_2n = my_math.isqrt(n+n)
# formula chosen by experimentation
# seems to be close to optimal for n < 10^50
bound = int(5 * math.log(n, 10)**2)
prime = []
mod_root = []
log_p = []
num_prime = 0
# find a number of small primes for which n is a quadratic residue
p = 2
while p < bound or num_prime < 3:
# legendre (n|p) is only defined for odd p
if p > 2:
leg = my_math.legendre(n, p)
else:
leg = n & 1
if leg == 1:
prime += [p]
mod_root += [int(my_math.mod_sqrt(n, p))]
log_p += [math.log(p, 10)]
num_prime += 1
elif leg == 0:
if verbose:
print 'trial division found factors:'
print p, 'x', n/p
return p
p = my_math.next_prime(p)
# size of the sieve
x_max = bound*8
# maximum value on the sieved range
m_val = (x_max * root_2n) >> 1
# fudging the threshold down a bit makes it easier to find powers of primes as factors
# as well as partial-partial relationships, but it also makes the smoothness check slower.
# there's a happy medium somewhere, depending on how efficient the smoothness check is
thresh = math.log(m_val, 10) * 0.735
# skip small primes. they contribute very little to the log sum
# and add a lot of unnecessary entries to the table
# instead, fudge the threshold down a bit, assuming ~1/4 of them pass
min_prime = int(thresh*3)
fudge = sum(log_p[i] for i,p in enumerate(prime) if p < min_prime)/4
thresh -= fudge
sieve_primes = [p for p in prime if p >= min_prime]
sp_idx = prime.index(sieve_primes[0])
if verbose:
print 'smoothness bound:', bound
print 'sieve size:', x_max
print 'log threshold:', thresh
print 'skipping primes less than:', min_prime
smooth = []
used_prime = set()
partial = {}
num_smooth = 0
prev_num_smooth = 0
num_used_prime = 0
num_partial = 0
num_poly = 0
root_A = my_math.isqrt(root_2n / x_max)
if verbose:
print 'sieving for smooths...'
while True:
# find an integer value A such that:
# A is =~ sqrt(2*n) / x_max
# A is a perfect square
# sqrt(A) is prime, and n is a quadratic residue mod sqrt(A)
while True:
root_A = my_math.next_prime(root_A)
leg = my_math.legendre(n, root_A)
if leg == 1:
break
elif leg == 0:
if verbose:
print 'dumb luck found factors:'
print root_A, 'x', n/root_A
return root_A
A = root_A * root_A
# solve for an adequate B
# B*B is a quadratic residue mod n, such that B*B-A*C = n
# this is unsolvable if n is not a quadratic residue mod sqrt(A)
b = my_math.mod_sqrt(n, root_A)
B = (b + (n - b*b) * my_math.mod_inv(b + b, root_A))%A
# B*B-A*C = n <=> C = (B*B-n)/A
C = (B*B - n) / A
num_poly += 1
# sieve for prime factors
sums = [0.0]*(2*x_max)
i = sp_idx
for p in sieve_primes:
logp = log_p[i]
inv_A = my_math.mod_inv(A, p)
# modular root of the quadratic
a = int(((mod_root[i] - B) * inv_A)%p)
b = int(((p - mod_root[i] - B) * inv_A)%p)
amx = a+x_max
bmx = b+x_max
ax = amx-p
bx = bmx-p
k = p
while k < x_max:
sums[k+ax] += logp
sums[k+bx] += logp
sums[amx-k] += logp
sums[bmx-k] += logp
k += p
if k+ax < x_max:
sums[k+ax] += logp
if k+bx < x_max:
sums[k+bx] += logp
if amx-k > 0:
sums[amx-k] += logp
if bmx-k > 0:
sums[bmx-k] += logp
i += 1
# check for smooths
x = -x_max
for v in sums:
if v > thresh:
vec = set()
sqr = []
# because B*B-n = A*C
# (A*x+B)^2 - n = A*A*x*x+2*A*B*x + B*B - n
# = A*(A*x*x+2*B*x+C)
# gives the congruency
# (A*x+B)^2 = A*(A*x*x+2*B*x+C) (mod n)
# because A is chosen to be square, it doesn't need to be sieved
sieve_val = (A*x + B+B)*x + C
if sieve_val < 0:
vec = {-1}
sieve_val = -sieve_val
for p in prime:
while sieve_val%p == 0:
if p in vec:
# keep track of perfect square factors
# to avoid taking the sqrt of a gigantic number at the end
sqr += [p]
vec ^= {p}
sieve_val = int(sieve_val / p)
if sieve_val == 1:
# smooth
smooth += [(vec, (sqr, (A*x+B), root_A))]
used_prime |= vec
elif sieve_val in partial:
# combine two partials to make a (xor) smooth
# that is, every prime factor with an odd power is in our factor base
pair_vec, pair_vals = partial[sieve_val]
sqr += list(vec & pair_vec) + [sieve_val]
vec ^= pair_vec
smooth += [(vec, (sqr + pair_vals[0], (A*x+B)*pair_vals[1], root_A*pair_vals[2]))]
used_prime |= vec
num_partial += 1
else:
# save partial for later pairing
partial[sieve_val] = (vec, (sqr, A*x+B, root_A))
x += 1
prev_num_smooth = num_smooth
num_smooth = len(smooth)
num_used_prime = len(used_prime)
if verbose:
print 100 * num_smooth / num_prime, 'percent complete\r',
if num_smooth > num_used_prime and num_smooth > prev_num_smooth:
if verbose:
print '%d polynomials sieved (%d values)'%(num_poly, num_poly*x_max*2)
print 'found %d smooths (%d from partials) in %f seconds'%(num_smooth, num_partial, time.time()-time1)
print 'solving for non-trivial congruencies...'
used_prime_list = sorted(list(used_prime))
# set up bit fields for gaussian elimination
masks = []
mask = 1
bit_fields = [0]*num_used_prime
for vec, vals in smooth:
masks += [mask]
i = 0
for p in used_prime_list:
if p in vec: bit_fields[i] |= mask
i += 1
mask <<= 1
# row echelon form
col_offset = 0
null_cols = []
for col in xrange(num_smooth):
pivot = col-col_offset == num_used_prime or bit_fields[col-col_offset] & masks[col] == 0
for row in xrange(col+1-col_offset, num_used_prime):
if bit_fields[row] & masks[col]:
if pivot:
bit_fields[col-col_offset], bit_fields[row] = bit_fields[row], bit_fields[col-col_offset]
pivot = False
else:
bit_fields[row] ^= bit_fields[col-col_offset]
if pivot:
null_cols += [col]
col_offset += 1
# reduced row echelon form
for row in xrange(num_used_prime):
# lowest set bit
mask = bit_fields[row] & -bit_fields[row]
for up_row in xrange(row):
if bit_fields[up_row] & mask:
bit_fields[up_row] ^= bit_fields[row]
# check for non-trivial congruencies
for col in null_cols:
all_vec, (lh, rh, rA) = smooth[col]
lhs = lh # sieved values (left hand side)
rhs = [rh] # sieved values - n (right hand side)
rAs = [rA] # root_As (cofactor of lhs)
i = 0
for field in bit_fields:
if field & masks[col]:
vec, (lh, rh, rA) = smooth[i]
lhs += list(all_vec & vec) + lh
all_vec ^= vec
rhs += [rh]
rAs += [rA]
i += 1
factor = my_math.gcd(my_math.list_prod(rAs)*my_math.list_prod(lhs) - my_math.list_prod(rhs), n)
if 1 < factor < n:
break
else:
if verbose:
print 'none found.'
continue
break
if verbose:
print 'factors found:'
print factor, 'x', n/factor
print 'time elapsed: %f seconds'%(time.time()-time1)
return factor
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Uses a MPQS to factor a composite number')
parser.add_argument('composite', metavar='number_to_factor', type=long, help='the composite number to factor')
parser.add_argument('--verbose', dest='verbose', action='store_true', help="enable verbose output")
args = parser.parse_args()
if args.verbose:
mpqs(args.composite, args.verbose)
else:
time1 = time.time()
print mpqs(args.composite)
print 'time elapsed: %f seconds'%(time.time()-time1)
my_math.py
está tomado de la misma publicación que mpqs.py
, sin embargo, también he agregado el generador de números primos ilimitados que utilicé en mi respuesta para encontrar la brecha más grande entre los números primos buenos .
my_math.py
# primes less than 212
small_primes = [
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37,
41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89,
97,101,103,107,109,113,127,131,137,139,149,151,
157,163,167,173,179,181,191,193,197,199,211]
# pre-calced sieve of eratosthenes for n = 2, 3, 5, 7
indices = [
1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
53, 59, 61, 67, 71, 73, 79, 83, 89, 97,101,103,
107,109,113,121,127,131,137,139,143,149,151,157,
163,167,169,173,179,181,187,191,193,197,199,209]
# distances between sieve values
offsets = [
10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6,
6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4,
2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6,
4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2]
# tabulated, mod 105
dindices =[
0,10, 2, 0, 4, 0, 0, 0, 8, 0, 0, 2, 0, 4, 0,
0, 6, 2, 0, 4, 0, 0, 4, 6, 0, 0, 6, 0, 0, 2,
0, 6, 2, 0, 4, 0, 0, 4, 6, 0, 0, 2, 0, 4, 2,
0, 6, 6, 0, 0, 0, 0, 6, 6, 0, 0, 0, 0, 4, 2,
0, 6, 2, 0, 4, 0, 0, 4, 6, 0, 0, 2, 0, 6, 2,
0, 6, 0, 0, 4, 0, 0, 4, 6, 0, 0, 2, 0, 4, 8,
0, 0, 2, 0,10, 0, 0, 4, 0, 0, 0, 2, 0, 4, 2]
max_int = 2147483647
# returns the index of x in a sorted list a
# or the index of the next larger item if x is not present
# i.e. the proper insertion point for x in a
def binary_search(a, x):
s = 0
e = len(a)
m = e >> 1
while m != e:
if a[m] < x:
s = m
m = (s + e + 1) >> 1
else:
e = m
m = (s + e) >> 1
return m
# divide and conquer list product
def list_prod(a):
size = len(a)
if size == 1:
return a[0]
return list_prod(a[:size>>1]) * list_prod(a[size>>1:])
# greatest common divisor of a and b
def gcd(a, b):
while b:
a, b = b, a%b
return a
# extended gcd
def ext_gcd(a, m):
a = int(a%m)
x, u = 0, 1
while a:
x, u = u, x - (m/a)*u
m, a = a, m%a
return (m, x, u)
# legendre symbol (a|m)
# note: returns m-1 if a is a non-residue, instead of -1
def legendre(a, m):
return pow(a, (m-1) >> 1, m)
# modular inverse of a mod m
def mod_inv(a, m):
return ext_gcd(a, m)[1]
# modular sqrt(n) mod p
# p must be prime
def mod_sqrt(n, p):
a = n%p
if p%4 == 3:
return pow(a, (p+1) >> 2, p)
elif p%8 == 5:
v = pow(a << 1, (p-5) >> 3, p)
i = ((a*v*v << 1) % p) - 1
return (a*v*i)%p
elif p%8 == 1:
# Shank's method
q = p-1
e = 0
while q&1 == 0:
e += 1
q >>= 1
n = 2
while legendre(n, p) != p-1:
n += 1
w = pow(a, q, p)
x = pow(a, (q+1) >> 1, p)
y = pow(n, q, p)
r = e
while True:
if w == 1:
return x
v = w
k = 0
while v != 1 and k+1 < r:
v = (v*v)%p
k += 1
if k == 0:
return x
d = pow(y, 1 << (r-k-1), p)
x = (x*d)%p
y = (d*d)%p
w = (w*y)%p
r = k
else: # p == 2
return a
#integer sqrt of n
def isqrt(n):
c = n*4/3
d = c.bit_length()
a = d>>1
if d&1:
x = 1 << a
y = (x + (n >> a)) >> 1
else:
x = (3 << a) >> 2
y = (x + (c >> a)) >> 1
if x != y:
x = y
y = (x + n/x) >> 1
while y < x:
x = y
y = (x + n/x) >> 1
return x
# integer cbrt of n
def icbrt(n):
d = n.bit_length()
if d%3 == 2:
x = 3 << d/3-1
else:
x = 1 << d/3
y = (2*x + n/(x*x))/3
if x != y:
x = y
y = (2*x + n/(x*x))/3
while y < x:
x = y
y = (2*x + n/(x*x))/3
return x
# strong probable prime
def is_sprp(n, b=2):
if n < 2: return False
d = n-1
s = 0
while d&1 == 0:
s += 1
d >>= 1
x = pow(b, d, n)
if x == 1 or x == n-1:
return True
for r in xrange(1, s):
x = (x * x)%n
if x == 1:
return False
elif x == n-1:
return True
return False
# lucas probable prime
# assumes D = 1 (mod 4), (D|n) = -1
def is_lucas_prp(n, D):
P = 1
Q = (1-D) >> 2
# n+1 = 2**r*s where s is odd
s = n+1
r = 0
while s&1 == 0:
r += 1
s >>= 1
# calculate the bit reversal of (odd) s
# e.g. 19 (10011) <=> 25 (11001)
t = 0
while s:
if s&1:
t += 1
s -= 1
else:
t <<= 1
s >>= 1
# use the same bit reversal process to calculate the sth Lucas number
# keep track of q = Q**n as we go
U = 0
V = 2
q = 1
# mod_inv(2, n)
inv_2 = (n+1) >> 1
while t:
if t&1:
# U, V of n+1
U, V = ((U + V) * inv_2)%n, ((D*U + V) * inv_2)%n
q = (q * Q)%n
t -= 1
else:
# U, V of n*2
U, V = (U * V)%n, (V * V - 2 * q)%n
q = (q * q)%n
t >>= 1
# double s until we have the 2**r*sth Lucas number
while r:
U, V = (U * V)%n, (V * V - 2 * q)%n
q = (q * q)%n
r -= 1
# primality check
# if n is prime, n divides the n+1st Lucas number, given the assumptions
return U == 0
## Baillie-PSW ##
# this is technically a probabalistic test, but there are no known pseudoprimes
def is_bpsw(n):
if not is_sprp(n, 2): return False
# idea shamelessly stolen from Mathmatica's PrimeQ
# if n is a 2-sprp and a 3-sprp, n is necessarily square-free
if not is_sprp(n, 3): return False
a = 5
s = 2
# if n is a perfect square, this will never terminate
while legendre(a, n) != n-1:
s = -s
a = s-a
return is_lucas_prp(n, a)
# an 'almost certain' primality check
def is_prime(n):
if n < 212:
m = binary_search(small_primes, n)
return n == small_primes[m]
for p in small_primes:
if n%p == 0:
return False
# if n is a 32-bit integer, perform full trial division
if n <= max_int:
p = 211
while p*p < n:
for o in offsets:
p += o
if n%p == 0:
return False
return True
return is_bpsw(n)
# next prime strictly larger than n
def next_prime(n):
if n < 2:
return 2
# first odd larger than n
n = (n + 1) | 1
if n < 212:
m = binary_search(small_primes, n)
return small_primes[m]
# find our position in the sieve rotation via binary search
x = int(n%210)
m = binary_search(indices, x)
i = int(n + (indices[m] - x))
# adjust offsets
offs = offsets[m:] + offsets[:m]
while True:
for o in offs:
if is_prime(i):
return i
i += o
# an infinite prime number generator
def primes(start = 0):
for n in small_primes[start:]: yield n
pg = primes(6)
p = pg.next()
q = p*p
sieve = {221: 13, 253: 11}
n = 211
while True:
for o in offsets:
n += o
stp = sieve.pop(n, 0)
if stp:
nxt = n/stp
nxt += dindices[nxt%105]
while nxt*stp in sieve:
nxt += dindices[nxt%105]
sieve[nxt*stp] = stp
elif n < q:
yield n
else:
sieve[q + dindices[p%105]*p] = p
p = pg.next()
q = p*p
# true if n is a prime power > 0
def is_prime_power(n):
if n > 1:
for p in small_primes:
if n%p == 0:
n /= p
while n%p == 0: n /= p
return n == 1
r = isqrt(n)
if r*r == n:
return is_prime_power(r)
s = icbrt(n)
if s*s*s == n:
return is_prime_power(s)
p = 211
while p*p < r:
for o in offsets:
p += o
if n%p == 0:
n /= p
while n%p == 0: n /= p
return n == 1
if n <= max_int:
while p*p < n:
for o in offsets:
p += o
if n%p == 0:
return False
return True
return is_bpsw(n)
return False