class SummingThread(threading.Thread):
def __init__(self, low, high):
super(SummingThread, self).__init__()
self.low = low
self.high = high
self.total = 0
def run(self):
for i in range(self.low, self.high):
self.total += i
thread1 = SummingThread(0, 500000)
thread2 = SummingThread(500000, 1000000)
thread1.start() # This actually causes the thread to run
thread2.start()
thread1.join() # This waits until the thread has completed
thread2.join()
# At this point, both threads have completed
result = thread1.total + thread2.total
print(result)
def do_threaded_work(work_items, work_func, num_threads=None, per_sync_timeout=1, preserve_result_ordering=True):
""" Executes work_func on each work_item. Note: Execution order is not preserved, but output ordering is (optionally).
Parameters:
- num_threads Default: len(work_items) --- Number of threads to use process items in work_items.
- per_sync_timeout Default: 1 --- Each synchronized operation can optionally timeout.
- preserve_result_ordering Default: True --- Reorders result_item to match original work_items ordering.
Return:
--- list of results from applying work_func to each work_item. Order is optionally preserved.
Example:
def process_url(url):
# TODO: Do some work with the url
return url
index = 0
for work_item in work_items:
if preserve_result_ordering:
work_queue.put((index, work_item))
else:
work_queue.put(work_item)
index += 1
if preserve_result_ordering:
wrapped_work_func = lambda work_item: (work_item[0], work_func(work_item[1]))
start_logging_with_thread_info()
#spawn a pool of threads, and pass them queue instance
for _ in range(num_threads):
if preserve_result_ordering:
t = ThreadedWorker(work_queue, result_queue, work_func=wrapped_work_func, queue_timeout=per_sync_timeout)
else:
t = ThreadedWorker(work_queue, result_queue, work_func=work_func, queue_timeout=per_sync_timeout)
t.setDaemon(True)
t.start()
work_queue.join()
stop_logging_with_thread_info()
logging.info('work_queue joined')
result_items = []
while not result_queue.empty():
result = result_queue.get(timeout=per_sync_timeout)
logging.info('found result[:500]: ' + repr(result)[:500])
if result:
result_items.append(result)
if preserve_result_ordering:
result_items = [work_item for index, work_item in result_items]
return result_items
class ThreadedWorker(threading.Thread):
""" Generic Threaded Worker
Input to work_func: item from work_queue
def process_url(url):
# TODO: Do some work with the url
return url
def main():
# spawn a pool of threads, and pass them queue instance
for i in range(3):
t = ThreadedWorker(work_queue, result_queue, work_func=process_url)
t.setDaemon(True)
t.start()
# populate queue with data
for url in urls_to_process:
work_queue.put(url)
# wait on the queue until everything has been processed
work_queue.join()
def process_url(url):
# TODO: Do some work with the url
return url
def main():
# spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadedWorker(work_queue, result_queue, work_func=process_url)
t.setDaemon(True)
t.start()
# populate queue with data
for url in urls_to_process:
work_queue.put(url)
# wait on the queue until everything has been processed
work_queue.join()