最快的方式列出N以下的所有素数
这是我能想到的最好的算法。
def get_primes(n):
numbers = set(range(n, 1, -1))
primes = []
while numbers:
p = numbers.pop()
primes.append(p)
numbers.difference_update(set(range(p*2, n+1, p)))
return primes
>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import get_primes').timeit(1)
1.1499958793645562
它可以做得更快吗?
这个代码有一个缺陷:因为numbers
是一个无序集合,所以不能保证numbers.pop()
会从集合中删除最小的数字。 尽管如此,它对某些输入数字起作用(至少对我而言):
>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True
警告:由于硬件或Python版本的差异, timeit
结果可能会有所不同。
下面是一个比较大量实现的脚本:
非常感谢stephan将sieve_wheel_30引起我的注意。 值得一提的是罗伯特威廉汉克斯的primesfrom2to,primesfrom3to,rwh_primes,rwh_primes1和rwh_primes2。
在使用psyco测试的普通Python方法中,对于n = 1000000, rwh_primes1是最快的测试。
+---------------------+-------+
| Method | ms |
+---------------------+-------+
| rwh_primes1 | 43.0 |
| sieveOfAtkin | 46.4 |
| rwh_primes | 57.4 |
| sieve_wheel_30 | 63.0 |
| rwh_primes2 | 67.8 |
| sieveOfEratosthenes | 147.0 |
| ambi_sieve_plain | 152.0 |
| sundaram3 | 194.0 |
+---------------------+-------+
在测试的普通Python方法中, 如果没有psyco ,对于n = 1000000, rwh_primes2是最快的。
+---------------------+-------+
| Method | ms |
+---------------------+-------+
| rwh_primes2 | 68.1 |
| rwh_primes1 | 93.7 |
| rwh_primes | 94.6 |
| sieve_wheel_30 | 97.4 |
| sieveOfEratosthenes | 178.0 |
| ambi_sieve_plain | 286.0 |
| sieveOfAtkin | 314.0 |
| sundaram3 | 416.0 |
+---------------------+-------+
在所有测试的方法中,允许numpy,n = 1000000, primesfrom2to是最快的测试。
+---------------------+-------+
| Method | ms |
+---------------------+-------+
| primesfrom2to | 15.9 |
| primesfrom3to | 18.4 |
| ambi_sieve | 29.3 |
+---------------------+-------+
时间使用命令测量:
python -mtimeit -s"import primes" "primes.{method}(1000000)"
用{method}
替换为每个方法名称。
primes.py:
#!/usr/bin/env python
import psyco; psyco.full()
from math import sqrt, ceil
import numpy as np
def rwh_primes(n):
# https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Returns a list of primes < n """
sieve = [True] * n
for i in xrange(3,int(n**0.5)+1,2):
if sieve[i]:
sieve[i*i::2*i]=[False]*((n-i*i-1)/(2*i)+1)
return [2] + [i for i in xrange(3,n,2) if sieve[i]]
def rwh_primes1(n):
# https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Returns a list of primes < n """
sieve = [True] * (n/2)
for i in xrange(3,int(n**0.5)+1,2):
if sieve[i/2]:
sieve[i*i/2::i] = [False] * ((n-i*i-1)/(2*i)+1)
return [2] + [2*i+1 for i in xrange(1,n/2) if sieve[i]]
def rwh_primes2(n):
# https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Input n>=6, Returns a list of primes, 2 <= p < n """
correction = (n%6>1)
n = {0:n,1:n-1,2:n+4,3:n+3,4:n+2,5:n+1}[n%6]
sieve = [True] * (n/3)
sieve[0] = False
for i in xrange(int(n**0.5)/3+1):
if sieve[i]:
k=3*i+1|1
sieve[ ((k*k)/3) ::2*k]=[False]*((n/6-(k*k)/6-1)/k+1)
sieve[(k*k+4*k-2*k*(i&1))/3::2*k]=[False]*((n/6-(k*k+4*k-2*k*(i&1))/6-1)/k+1)
return [2,3] + [3*i+1|1 for i in xrange(1,n/3-correction) if sieve[i]]
def sieve_wheel_30(N):
# http://zerovolt.com/?p=88
''' Returns a list of primes <= N using wheel criterion 2*3*5 = 30
Copyright 2009 by zerovolt.com
This code is free for non-commercial purposes, in which case you can just leave this comment as a credit for my work.
If you need this code for commercial purposes, please contact me by sending an email to: info [at] zerovolt [dot] com.'''
__smallp = ( 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139,
149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311,
313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401,
409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491,
499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683,
691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797,
809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887,
907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997)
wheel = (2, 3, 5)
const = 30
if N < 2:
return []
if N <= const:
pos = 0
while __smallp[pos] <= N:
pos += 1
return list(__smallp[:pos])
# make the offsets list
offsets = (7, 11, 13, 17, 19, 23, 29, 1)
# prepare the list
p = [2, 3, 5]
dim = 2 + N // const
tk1 = [True] * dim
tk7 = [True] * dim
tk11 = [True] * dim
tk13 = [True] * dim
tk17 = [True] * dim
tk19 = [True] * dim
tk23 = [True] * dim
tk29 = [True] * dim
tk1[0] = False
# help dictionary d
# d[a , b] = c ==> if I want to find the smallest useful multiple of (30*pos)+a
# on tkc, then I need the index given by the product of [(30*pos)+a][(30*pos)+b]
# in general. If b < a, I need [(30*pos)+a][(30*(pos+1))+b]
d = {}
for x in offsets:
for y in offsets:
res = (x*y) % const
if res in offsets:
d[(x, res)] = y
# another help dictionary: gives tkx calling tmptk[x]
tmptk = {1:tk1, 7:tk7, 11:tk11, 13:tk13, 17:tk17, 19:tk19, 23:tk23, 29:tk29}
pos, prime, lastadded, stop = 0, 0, 0, int(ceil(sqrt(N)))
# inner functions definition
def del_mult(tk, start, step):
for k in xrange(start, len(tk), step):
tk[k] = False
# end of inner functions definition
cpos = const * pos
while prime < stop:
# 30k + 7
if tk7[pos]:
prime = cpos + 7
p.append(prime)
lastadded = 7
for off in offsets:
tmp = d[(7, off)]
start = (pos + prime) if off == 7 else (prime * (const * (pos + 1 if tmp < 7 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# 30k + 11
if tk11[pos]:
prime = cpos + 11
p.append(prime)
lastadded = 11
for off in offsets:
tmp = d[(11, off)]
start = (pos + prime) if off == 11 else (prime * (const * (pos + 1 if tmp < 11 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# 30k + 13
if tk13[pos]:
prime = cpos + 13
p.append(prime)
lastadded = 13
for off in offsets:
tmp = d[(13, off)]
start = (pos + prime) if off == 13 else (prime * (const * (pos + 1 if tmp < 13 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# 30k + 17
if tk17[pos]:
prime = cpos + 17
p.append(prime)
lastadded = 17
for off in offsets:
tmp = d[(17, off)]
start = (pos + prime) if off == 17 else (prime * (const * (pos + 1 if tmp < 17 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# 30k + 19
if tk19[pos]:
prime = cpos + 19
p.append(prime)
lastadded = 19
for off in offsets:
tmp = d[(19, off)]
start = (pos + prime) if off == 19 else (prime * (const * (pos + 1 if tmp < 19 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# 30k + 23
if tk23[pos]:
prime = cpos + 23
p.append(prime)
lastadded = 23
for off in offsets:
tmp = d[(23, off)]
start = (pos + prime) if off == 23 else (prime * (const * (pos + 1 if tmp < 23 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# 30k + 29
if tk29[pos]:
prime = cpos + 29
p.append(prime)
lastadded = 29
for off in offsets:
tmp = d[(29, off)]
start = (pos + prime) if off == 29 else (prime * (const * (pos + 1 if tmp < 29 else 0) + tmp) )//const
del_mult(tmptk[off], start, prime)
# now we go back to top tk1, so we need to increase pos by 1
pos += 1
cpos = const * pos
# 30k + 1
if tk1[pos]:
prime = cpos + 1
p.append(prime)
lastadded = 1
for off in offsets:
tmp = d[(1, off)]
start = (pos + prime) if off == 1 else (prime * (const * pos + tmp) )//const
del_mult(tmptk[off], start, prime)
# time to add remaining primes
# if lastadded == 1, remove last element and start adding them from tk1
# this way we don't need an "if" within the last while
if lastadded == 1:
p.pop()
# now complete for every other possible prime
while pos < len(tk1):
cpos = const * pos
if tk1[pos]: p.append(cpos + 1)
if tk7[pos]: p.append(cpos + 7)
if tk11[pos]: p.append(cpos + 11)
if tk13[pos]: p.append(cpos + 13)
if tk17[pos]: p.append(cpos + 17)
if tk19[pos]: p.append(cpos + 19)
if tk23[pos]: p.append(cpos + 23)
if tk29[pos]: p.append(cpos + 29)
pos += 1
# remove exceeding if present
pos = len(p) - 1
while p[pos] > N:
pos -= 1
if pos < len(p) - 1:
del p[pos+1:]
# return p list
return p
def sieveOfEratosthenes(n):
"""sieveOfEratosthenes(n): return the list of the primes < n."""
# Code from: <dickinsm@gmail.com>, Nov 30 2006
# http://groups.google.com/group/comp.lang.python/msg/f1f10ced88c68c2d
if n <= 2:
return []
sieve = range(3, n, 2)
top = len(sieve)
for si in sieve:
if si:
bottom = (si*si - 3) // 2
if bottom >= top:
break
sieve[bottom::si] = [0] * -((bottom - top) // si)
return [2] + [el for el in sieve if el]
def sieveOfAtkin(end):
"""sieveOfAtkin(end): return a list of all the prime numbers <end
using the Sieve of Atkin."""
# Code by Steve Krenzel, <Sgk284@gmail.com>, improved
# Code: https://web.archive.org/web/20080324064651/http://krenzel.info/?p=83
# Info: http://en.wikipedia.org/wiki/Sieve_of_Atkin
assert end > 0
lng = ((end-1) // 2)
sieve = [False] * (lng + 1)
x_max, x2, xd = int(sqrt((end-1)/4.0)), 0, 4
for xd in xrange(4, 8*x_max + 2, 8):
x2 += xd
y_max = int(sqrt(end-x2))
n, n_diff = x2 + y_max*y_max, (y_max << 1) - 1
if not (n & 1):
n -= n_diff
n_diff -= 2
for d in xrange((n_diff - 1) << 1, -1, -8):
m = n % 12
if m == 1 or m == 5:
m = n >> 1
sieve[m] = not sieve[m]
n -= d
x_max, x2, xd = int(sqrt((end-1) / 3.0)), 0, 3
for xd in xrange(3, 6 * x_max + 2, 6):
x2 += xd
y_max = int(sqrt(end-x2))
n, n_diff = x2 + y_max*y_max, (y_max << 1) - 1
if not(n & 1):
n -= n_diff
n_diff -= 2
for d in xrange((n_diff - 1) << 1, -1, -8):
if n % 12 == 7:
m = n >> 1
sieve[m] = not sieve[m]
n -= d
x_max, y_min, x2, xd = int((2 + sqrt(4-8*(1-end)))/4), -1, 0, 3
for x in xrange(1, x_max + 1):
x2 += xd
xd += 6
if x2 >= end: y_min = (((int(ceil(sqrt(x2 - end))) - 1) << 1) - 2) << 1
n, n_diff = ((x*x + x) << 1) - 1, (((x-1) << 1) - 2) << 1
for d in xrange(n_diff, y_min, -8):
if n % 12 == 11:
m = n >> 1
sieve[m] = not sieve[m]
n += d
primes = [2, 3]
if end <= 3:
return primes[:max(0,end-2)]
for n in xrange(5 >> 1, (int(sqrt(end))+1) >> 1):
if sieve[n]:
primes.append((n << 1) + 1)
aux = (n << 1) + 1
aux *= aux
for k in xrange(aux, end, 2 * aux):
sieve[k >> 1] = False
s = int(sqrt(end)) + 1
if s % 2 == 0:
s += 1
primes.extend([i for i in xrange(s, end, 2) if sieve[i >> 1]])
return primes
def ambi_sieve_plain(n):
s = range(3, n, 2)
for m in xrange(3, int(n**0.5)+1, 2):
if s[(m-3)/2]:
for t in xrange((m*m-3)/2,(n>>1)-1,m):
s[t]=0
return [2]+[t for t in s if t>0]
def sundaram3(max_n):
# https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/2073279#2073279
numbers = range(3, max_n+1, 2)
half = (max_n)//2
initial = 4
for step in xrange(3, max_n+1, 2):
for i in xrange(initial, half, step):
numbers[i-1] = 0
initial += 2*(step+1)
if initial > half:
return [2] + filter(None, numbers)
################################################################################
# Using Numpy:
def ambi_sieve(n):
# http://tommih.blogspot.com/2009/04/fast-prime-number-generator.html
s = np.arange(3, n, 2)
for m in xrange(3, int(n ** 0.5)+1, 2):
if s[(m-3)/2]:
s[(m*m-3)/2::m]=0
return np.r_[2, s[s>0]]
def primesfrom3to(n):
# https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Returns a array of primes, p < n """
assert n>=2
sieve = np.ones(n/2, dtype=np.bool)
for i in xrange(3,int(n**0.5)+1,2):
if sieve[i/2]:
sieve[i*i/2::i] = False
return np.r_[2, 2*np.nonzero(sieve)[0][1::]+1]
def primesfrom2to(n):
# https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
""" Input n>=6, Returns a array of primes, 2 <= p < n """
sieve = np.ones(n/3 + (n%6==2), dtype=np.bool)
sieve[0] = False
for i in xrange(int(n**0.5)/3+1):
if sieve[i]:
k=3*i+1|1
sieve[ ((k*k)/3) ::2*k] = False
sieve[(k*k+4*k-2*k*(i&1))/3::2*k] = False
return np.r_[2,3,((3*np.nonzero(sieve)[0]+1)|1)]
if __name__=='__main__':
import itertools
import sys
def test(f1,f2,num):
print('Testing {f1} and {f2} return same results'.format(
f1=f1.func_name,
f2=f2.func_name))
if not all([a==b for a,b in itertools.izip_longest(f1(num),f2(num))]):
sys.exit("Error: %s(%s) != %s(%s)"%(f1.func_name,num,f2.func_name,num))
n=1000000
test(sieveOfAtkin,sieveOfEratosthenes,n)
test(sieveOfAtkin,ambi_sieve,n)
test(sieveOfAtkin,ambi_sieve_plain,n)
test(sieveOfAtkin,sundaram3,n)
test(sieveOfAtkin,sieve_wheel_30,n)
test(sieveOfAtkin,primesfrom3to,n)
test(sieveOfAtkin,primesfrom2to,n)
test(sieveOfAtkin,rwh_primes,n)
test(sieveOfAtkin,rwh_primes1,n)
test(sieveOfAtkin,rwh_primes2,n)
运行脚本测试所有实现都会得到相同的结果。
相关问题(涉及素数生成器和包括基准):
加快Python中的bitstring / bit操作?
更快,更具记忆力的纯Python代码:
def primes(n):
""" Returns a list of primes < n """
sieve = [True] * n
for i in xrange(3,int(n**0.5)+1,2):
if sieve[i]:
sieve[i*i::2*i]=[False]*((n-i*i-1)/(2*i)+1)
return [2] + [i for i in xrange(3,n,2) if sieve[i]]
或者以半筛子开始
def primes1(n):
""" Returns a list of primes < n """
sieve = [True] * (n/2)
for i in xrange(3,int(n**0.5)+1,2):
if sieve[i/2]:
sieve[i*i/2::i] = [False] * ((n-i*i-1)/(2*i)+1)
return [2] + [2*i+1 for i in xrange(1,n/2) if sieve[i]]
更快,更具记忆力的numpy代码:
import numpy
def primesfrom3to(n):
""" Returns a array of primes, 3 <= p < n """
sieve = numpy.ones(n/2, dtype=numpy.bool)
for i in xrange(3,int(n**0.5)+1,2):
if sieve[i/2]:
sieve[i*i/2::i] = False
return 2*numpy.nonzero(sieve)[0][1::]+1
从筛子的三分之一开始更快的变化:
import numpy
def primesfrom2to(n):
""" Input n>=6, Returns a array of primes, 2 <= p < n """
sieve = numpy.ones(n/3 + (n%6==2), dtype=numpy.bool)
for i in xrange(1,int(n**0.5)/3+1):
if sieve[i]:
k=3*i+1|1
sieve[ k*k/3 ::2*k] = False
sieve[k*(k-2*(i&1)+4)/3::2*k] = False
return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)]
上面代码的一个(难以编码的)纯python版本将是:
def primes2(n):
""" Input n>=6, Returns a list of primes, 2 <= p < n """
n, correction = n-n%6+6, 2-(n%6>1)
sieve = [True] * (n/3)
for i in xrange(1,int(n**0.5)/3+1):
if sieve[i]:
k=3*i+1|1
sieve[ k*k/3 ::2*k] = [False] * ((n/6-k*k/6-1)/k+1)
sieve[k*(k-2*(i&1)+4)/3::2*k] = [False] * ((n/6-k*(k-2*(i&1)+4)/6-1)/k+1)
return [2,3] + [3*i+1|1 for i in xrange(1,n/3-correction) if sieve[i]]
不幸的是,pure-python并没有采用更简单,更快速的numpy方式来执行Assignment,并在循环内调用len(),如[False]*len(sieve[((k*k)/3)::2*k])
太慢了。 所以我不得不即兴改正输入(并避免更多的数学),并做一些极端(和痛苦)的数学魔法。
就个人而言,我认为numpy(这是如此广泛使用的)不是python标准库的一部分(在python 3发布之后2年并且没有numpy兼容性)并且在语法和速度方面的改进似乎完全被忽略python开发者。
Python Cookbook中有一个非常简洁的示例 - 在该URL上提出的最快版本是:
import itertools
def erat2( ):
D = { }
yield 2
for q in itertools.islice(itertools.count(3), 0, None, 2):
p = D.pop(q, None)
if p is None:
D[q*q] = q
yield q
else:
x = p + q
while x in D or not (x&1):
x += p
D[x] = p
所以这会给
def get_primes_erat(n):
return list(itertools.takewhile(lambda p: p<n, erat2()))
使用pri.py中的代码在shell提示符处进行测量(我喜欢这样做),我观察到:
$ python2.5 -mtimeit -s'import pri' 'pri.get_primes(1000000)'
10 loops, best of 3: 1.69 sec per loop
$ python2.5 -mtimeit -s'import pri' 'pri.get_primes_erat(1000000)'
10 loops, best of 3: 673 msec per loop
所以它看起来就像Cookbook解决方案的速度一样快了两倍。
链接地址: http://www.djcxy.com/p/15199.html