Refactor tasklist for easier integration by taskoverview

2025-12-13 13:30:23 +00:00 · 2014-05-10 17:24:50 +02:00 · 2014-05-10 17:24:50 +02:00 · 02c683120a
commit 02c683120a
parent f0e5be0ded
8 changed files with 284 additions and 198 deletions
--- a/bootstrapvz/base/main.py
+++ b/bootstrapvz/base/main.py
@ -63,10 +63,11 @@ def run(opts):
 	manifest = Manifest(opts['MANIFEST'])

 	# Get the tasklist
+	from tasklist import load_tasks
 	from tasklist import TaskList
-	tasklist = TaskList()
+	tasks = load_tasks('resolve_tasks', manifest)
+	tasklist = TaskList(tasks)
 	# 'resolve_tasks' is the name of the function to call on the provider and plugins
-	tasklist.load('resolve_tasks', manifest)

 	# Create the bootstrap information object that'll be used throughout the bootstrapping process
 	from bootstrapinfo import BootstrapInformation
@ -85,23 +86,23 @@ def run(opts):
 			raw_input('Press Enter to commence rollback')
 		log.error('Rolling back')

-		# Create a new tasklist to gather the necessary tasks for rollback
-		rollback_tasklist = TaskList()
-
 		# Create a useful little function for the provider and plugins to use,
 		# when figuring out what tasks should be added to the rollback list.
-		def counter_task(task, counter):
+		def counter_task(taskset, task, counter):
 			"""counter_task() adds the second argument to the rollback tasklist
 			if the first argument is present in the list of completed tasks

+			:param set taskset: The taskset to add the rollback task to
 			:param Task task: The task to look for in the completed tasks list
 			:param Task counter: The task to add to the rollback tasklist
 			"""
 			if task in tasklist.tasks_completed and counter not in tasklist.tasks_completed:
-				rollback_tasklist.tasks.add(counter)
+				taskset.add(counter)
+
 		# Ask the provider and plugins for tasks they'd like to add to the rollback tasklist
 		# Any additional arguments beyond the first two are passed directly to the provider and plugins
-		rollback_tasklist.load('resolve_rollback_tasks', manifest, tasklist.tasks_completed, counter_task)
+		rollback_tasks = load_tasks('resolve_rollback_tasks', manifest, tasklist.tasks_completed, counter_task)
+		rollback_tasklist = TaskList(rollback_tasks)

 		# Run the rollback tasklist
 		rollback_tasklist.run(info=bootstrap_info, dry_run=opts['--dry-run'])
--- a/bootstrapvz/base/tasklist.py
+++ b/bootstrapvz/base/tasklist.py
@ -11,28 +11,10 @@ class TaskList(object):
 	and orders them according to their dependencies.
 	"""

-	def __init__(self):
-		self.tasks = set()
+	def __init__(self, tasks):
+		self.tasks = tasks
 		self.tasks_completed = []

-	def load(self, function, manifest, *args):
-		"""Calls 'function' on the provider and all plugins that have been loaded by the manifest.
-		Any additional arguments are passed directly to 'function'.
-		The function that is called shall accept the taskset as its first argument and the manifest
-		as its second argument.
-
-		:param str function: Name of the function to call
-		:param Manifest manifest: The manifest
-		:param list *args: Additional arguments that should be passed to the function that is called
-		"""
-		# Call 'function' on the provider
-		getattr(manifest.modules['provider'], function)(self.tasks, manifest, *args)
-		for plugin in manifest.modules['plugins']:
-			# Plugins are not required to have whatever function we call
-			fn = getattr(plugin, function, None)
-			if callable(fn):
-				fn(self.tasks, manifest, *args)
-
 	def run(self, info, dry_run=False):
 		"""Converts the taskgraph into a list and runs all tasks in that list

@ -40,7 +22,7 @@ class TaskList(object):
 		:param bool dry_run: Whether to actually run the tasks or simply step through them
 		"""
 		# Create a list for us to run
-		task_list = self.create_list()
+		task_list = create_list(self.tasks)
 		# Output the tasklist
 		log.debug('Tasklist:\n\t' + ('\n\t'.join(map(repr, task_list))))

@ -57,185 +39,212 @@ class TaskList(object):
 			# Remember which tasks have been run for later use (e.g. when rolling back, because of an error)
 			self.tasks_completed.append(task)

-	def create_list(self):
-		"""Creates a list of all the tasks that should be run.
-		"""
-		from bootstrapvz.common.phases import order
-		# Get a hold of all tasks
-		tasks = self.get_all_tasks()
-		# Make sure the taskset is a subset of all the tasks we have gathered
-		self.tasks.issubset(tasks)
-		# Create a graph over all tasks by creating a map of each tasks successors
-		graph = {}
-		for task in tasks:
-			# Do a sanity check first
-			self.check_ordering(task)
-			successors = set()
-			# Add all successors mentioned in the task
-			successors.update(task.successors)
-			# Add all tasks that mention this task as a predecessor
-			successors.update(filter(lambda succ: task in succ.predecessors, tasks))
-			# Create a list of phases that succeed the phase of this task
-			succeeding_phases = order[order.index(task.phase) + 1:]
-			# Add all tasks that occur in above mentioned succeeding phases
-			successors.update(filter(lambda succ: succ.phase in succeeding_phases, tasks))
-			# Map the successors to the task
-			graph[task] = successors

-		# Use the strongly connected components algorithm to check for cycles in our task graph
-		components = self.strongly_connected_components(graph)
-		cycles_found = 0
-		for component in components:
-			# Node of 1 is also a strongly connected component but hardly a cycle, so we filter them out
-			if len(component) > 1:
-				cycles_found += 1
-				log.debug('Cycle: {list}\n' + (', '.join(map(repr, component))))
-		if cycles_found > 0:
-			msg = ('{num} cycles were found in the tasklist, '
-			       'consult the logfile for more information.'.format(num=cycles_found))
+def load_tasks(function, manifest, *args):
+	"""Calls ``function`` on the provider and all plugins that have been loaded by the manifest.
+	Any additional arguments are passed directly to ``function``.
+	The function that is called shall accept the taskset as its first argument and the manifest
+	as its second argument.
+
+	:param str function: Name of the function to call
+	:param Manifest manifest: The manifest
+	:param list args: Additional arguments that should be passed to the function that is called
+	"""
+	tasks = set()
+	# Call 'function' on the provider
+	getattr(manifest.modules['provider'], function)(tasks, manifest, *args)
+	for plugin in manifest.modules['plugins']:
+		# Plugins are not required to have whatever function we call
+		fn = getattr(plugin, function, None)
+		if callable(fn):
+			fn(tasks, manifest, *args)
+	return tasks
+
+
+def create_list(subset):
+	"""Creates a list of all the tasks that should be run.
+	"""
+	from bootstrapvz.common.phases import order
+	# Get a hold of all tasks
+	tasks = get_all_tasks()
+	# Make sure the taskset is a subset of all the tasks we have gathered
+	subset.issubset(tasks)
+	# Create a graph over all tasks by creating a map of each tasks successors
+	graph = {}
+	for task in tasks:
+		# Do a sanity check first
+		check_ordering(task)
+		successors = set()
+		# Add all successors mentioned in the task
+		successors.update(task.successors)
+		# Add all tasks that mention this task as a predecessor
+		successors.update(filter(lambda succ: task in succ.predecessors, tasks))
+		# Create a list of phases that succeed the phase of this task
+		succeeding_phases = order[order.index(task.phase) + 1:]
+		# Add all tasks that occur in above mentioned succeeding phases
+		successors.update(filter(lambda succ: succ.phase in succeeding_phases, tasks))
+		# Map the successors to the task
+		graph[task] = successors
+
+	# Use the strongly connected components algorithm to check for cycles in our task graph
+	components = strongly_connected_components(graph)
+	cycles_found = 0
+	for component in components:
+		# Node of 1 is also a strongly connected component but hardly a cycle, so we filter them out
+		if len(component) > 1:
+			cycles_found += 1
+			log.debug('Cycle: {list}\n' + (', '.join(map(repr, component))))
+	if cycles_found > 0:
+		msg = ('{num} cycles were found in the tasklist, '
+		       'consult the logfile for more information.'.format(num=cycles_found))
+		raise TaskListError(msg)
+
+	# Run a topological sort on the graph, returning an ordered list
+	sorted_tasks = topological_sort(graph)
+
+	# Filter out any tasks not in the tasklist
+	# We want to maintain ordering, so we don't use set intersection
+	sorted_tasks = filter(lambda task: task in subset, sorted_tasks)
+	return sorted_tasks
+
+
+def get_all_tasks():
+	"""Gets a list of all task classes in the package
+
+	:return: A list of all tasks in the package
+	:rtype: list
+	"""
+	# Get a generator that returns all classes in the package
+	import os.path
+	pkg_path = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
+	classes = get_all_classes(pkg_path, 'bootstrapvz.')
+
+	# lambda function to check whether a class is a task (excluding the superclass Task)
+	def is_task(obj):
+		from task import Task
+		return issubclass(obj, Task) and obj is not Task
+	return filter(is_task, classes)  # Only return classes that are tasks
+
+
+def get_all_classes(path=None, prefix=''):
+	""" Given a path to a package, this function retrieves all the classes in it
+
+	:param str path: Path to the package
+	:param str prefix: Name of the package followed by a dot
+	:return: A generator that yields classes
+	:rtype: generator
+	:raises Exception: If a module cannot be inspected.
+	"""
+	import pkgutil
+	import importlib
+	import inspect
+
+	def walk_error(module):
+		raise Exception('Unable to inspect module ' + module)
+	walker = pkgutil.walk_packages([path], prefix, walk_error)
+	for _, module_name, _ in walker:
+		module = importlib.import_module(module_name)
+		classes = inspect.getmembers(module, inspect.isclass)
+		for class_name, obj in classes:
+			# We only want classes that are defined in the module, and not imported ones
+			if obj.__module__ == module_name:
+					yield obj
+
+
+def check_ordering(task):
+	"""Checks the ordering of a task in relation to other tasks and their phases.
+
+	This function checks for a subset of what the strongly connected components algorithm does,
+	but can deliver a more precise error message, namely that there is a conflict between
+	what a task has specified as its predecessors or successors and in which phase it is placed.
+
+	:param Task task: The task to check the ordering for
+	:raises TaskListError: If there is a conflict between task precedence and phase precedence
+	"""
+	for successor in task.successors:
+		# Run through all successors and check whether the phase of the task
+		# comes before the phase of a successor
+		if task.phase > successor.phase:
+			msg = ("The task {task} is specified as running before {other}, "
+			       "but its phase '{phase}' lies after the phase '{other_phase}'"
+			       .format(task=task, other=successor, phase=task.phase, other_phase=successor.phase))
+			raise TaskListError(msg)
+	for predecessor in task.predecessors:
+		# Run through all predecessors and check whether the phase of the task
+		# comes after the phase of a predecessor
+		if task.phase < predecessor.phase:
+			msg = ("The task {task} is specified as running after {other}, "
+			       "but its phase '{phase}' lies before the phase '{other_phase}'"
+			       .format(task=task, other=predecessor, phase=task.phase, other_phase=predecessor.phase))
 			raise TaskListError(msg)

-		# Run a topological sort on the graph, returning an ordered list
-		sorted_tasks = self.topological_sort(graph)

-		# Filter out any tasks not in the tasklist
-		# We want to maintain ordering, so we don't use set intersection
-		sorted_tasks = filter(lambda task: task in self.tasks, sorted_tasks)
-		return sorted_tasks
+def strongly_connected_components(graph):
+	"""Find the strongly connected components in a graph using Tarjan's algorithm.

-	def get_all_tasks(self):
-		"""Gets a list of all task classes in the package
+	Source: http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py

-		:return: A list of all tasks in the package
-		:rtype: list
-		"""
-		# Get a generator that returns all classes in the package
-		import os.path
-		pkg_path = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
-		classes = self.get_all_classes(pkg_path, 'bootstrapvz.')
+	:param dict graph: mapping of tasks to lists of successor tasks
+	:return: List of tuples that are strongly connected comoponents
+	:rtype: list
+	"""

-		# lambda function to check whether a class is a task (excluding the superclass Task)
-		def is_task(obj):
-			from task import Task
-			return issubclass(obj, Task) and obj is not Task
-		return filter(is_task, classes)  # Only return classes that are tasks
+	result = []
+	stack = []
+	low = {}

-	def get_all_classes(self, path=None, prefix=''):
-		""" Given a path to a package, this function retrieves all the classes in it
+	def visit(node):
+		if node in low:
+			return

-		:param str path: Path to the package
-		:param str prefix: Name of the package followed by a dot
-		:return: A generator that yields classes
-		:rtype: generator
-		:raises Exception: If a module cannot be inspected.
-		"""
-		import pkgutil
-		import importlib
-		import inspect
+		num = len(low)
+		low[node] = num
+		stack_pos = len(stack)
+		stack.append(node)

-		def walk_error(module):
-			raise Exception('Unable to inspect module ' + module)
-		walker = pkgutil.walk_packages([path], prefix, walk_error)
-		for _, module_name, _ in walker:
-			module = importlib.import_module(module_name)
-			classes = inspect.getmembers(module, inspect.isclass)
-			for class_name, obj in classes:
-				# We only want classes that are defined in the module, and not imported ones
-				if obj.__module__ == module_name:
-						yield obj
+		for successor in graph[node]:
+			visit(successor)
+			low[node] = min(low[node], low[successor])

-	def check_ordering(self, task):
-		"""Checks the ordering of a task in relation to other tasks and their phases.
+		if num == low[node]:
+			component = tuple(stack[stack_pos:])
+			del stack[stack_pos:]
+			result.append(component)
+			for item in component:
+				low[item] = len(graph)

-		This function checks for a subset of what the strongly connected components algorithm does,
-		but can deliver a more precise error message, namely that there is a conflict between
-		what a task has specified as its predecessors or successors and in which phase it is placed.
+	for node in graph:
+		visit(node)

-		:param Task task: The task to check the ordering for
-		:raises TaskListError: If there is a conflict between task precedence and phase precedence
-		"""
-		for successor in task.successors:
-			# Run through all successors and check whether the phase of the task
-			# comes before the phase of a successor
-			if task.phase > successor.phase:
-				msg = ("The task {task} is specified as running before {other}, "
-				       "but its phase '{phase}' lies after the phase '{other_phase}'"
-				       .format(task=task, other=successor, phase=task.phase, other_phase=successor.phase))
-				raise TaskListError(msg)
-		for predecessor in task.predecessors:
-			# Run through all predecessors and check whether the phase of the task
-			# comes after the phase of a predecessor
-			if task.phase < predecessor.phase:
-				msg = ("The task {task} is specified as running after {other}, "
-				       "but its phase '{phase}' lies before the phase '{other_phase}'"
-				       .format(task=task, other=predecessor, phase=task.phase, other_phase=predecessor.phase))
-				raise TaskListError(msg)
+	return result

-	def strongly_connected_components(self, graph):
-		"""Find the strongly connected components in a graph using Tarjan's algorithm.

-		Source: http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py
+def topological_sort(graph):
+	"""Runs a topological sort on a graph.

-		:param dict graph: mapping of tasks to lists of successor tasks
-		:return: List of tuples that are strongly connected comoponents
-		:rtype: list
-		"""
+	Source: http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py

-		result = []
-		stack = []
-		low = {}
+	:param dict graph: mapping of tasks to lists of successor tasks
+	:return: A list of all tasks in the graph sorted according to ther dependencies
+	:rtype: list
+	"""
+	count = {}
+	for node in graph:
+		count[node] = 0
+	for node in graph:
+		for successor in graph[node]:
+			count[successor] += 1

-		def visit(node):
-			if node in low:
-				return
+	ready = [node for node in graph if count[node] == 0]

-			num = len(low)
-			low[node] = num
-			stack_pos = len(stack)
-			stack.append(node)
+	result = []
+	while ready:
+		node = ready.pop(-1)
+		result.append(node)

-			for successor in graph[node]:
-				visit(successor)
-				low[node] = min(low[node], low[successor])
+		for successor in graph[node]:
+			count[successor] -= 1
+			if count[successor] == 0:
+				ready.append(successor)

-			if num == low[node]:
-				component = tuple(stack[stack_pos:])
-				del stack[stack_pos:]
-				result.append(component)
-				for item in component:
-					low[item] = len(graph)
-
-		for node in graph:
-			visit(node)
-
-		return result
-
-	def topological_sort(self, graph):
-		"""Runs a topological sort on a graph.
-
-		Source: http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py
-
-		:param dict graph: mapping of tasks to lists of successor tasks
-		:return: A list of all tasks in the graph sorted according to ther dependencies
-		:rtype: list
-		"""
-		count = {}
-		for node in graph:
-			count[node] = 0
-		for node in graph:
-			for successor in graph[node]:
-				count[successor] += 1
-
-		ready = [node for node in graph if count[node] == 0]
-
-		result = []
-		while ready:
-			node = ready.pop(-1)
-			result.append(node)
-
-			for successor in graph[node]:
-				count[successor] -= 1
-				if count[successor] == 0:
-					ready.append(successor)
-
-		return result
+	return result
--- a/bootstrapvz/plugins/minimize_size/init.py
+++ b/bootstrapvz/plugins/minimize_size/init.py
@ -22,4 +22,4 @@ def resolve_tasks(taskset, manifest):


 def resolve_rollback_tasks(taskset, manifest, completed, counter_task):
-	counter_task(tasks.AddFolderMounts, tasks.RemoveFolderMounts)
+	counter_task(taskset, tasks.AddFolderMounts, tasks.RemoveFolderMounts)
--- a/bootstrapvz/plugins/prebootstrapped/init.py
+++ b/bootstrapvz/plugins/prebootstrapped/init.py
@ -52,6 +52,6 @@ def resolve_tasks(taskset, manifest):

 def resolve_rollback_tasks(taskset, manifest, completed, counter_task):
 	if manifest.volume['backing'] == 'ebs':
-		counter_task(CreateFromSnapshot, volume.Delete)
+		counter_task(taskset, CreateFromSnapshot, volume.Delete)
 	else:
-		counter_task(CreateFromImage, volume.Delete)
+		counter_task(taskset, CreateFromImage, volume.Delete)
--- a/bootstrapvz/plugins/vagrant/init.py
+++ b/bootstrapvz/plugins/vagrant/init.py
@ -31,4 +31,4 @@ def resolve_tasks(taskset, manifest):


 def resolve_rollback_tasks(taskset, manifest, completed, counter_task):
-	counter_task(tasks.CreateVagrantBoxDir, tasks.RemoveVagrantBoxDir)
+	counter_task(taskset, tasks.CreateVagrantBoxDir, tasks.RemoveVagrantBoxDir)
--- a/bootstrapvz/providers/ec2/init.py
+++ b/bootstrapvz/providers/ec2/init.py
@ -113,6 +113,6 @@ def resolve_tasks(taskset, manifest):

 def resolve_rollback_tasks(taskset, manifest, completed, counter_task):
 	taskset.update(task_groups.get_standard_rollback_tasks(completed))
-	counter_task(tasks.ebs.Create, volume.Delete)
-	counter_task(tasks.ebs.Attach, volume.Detach)
-	counter_task(tasks.ami.BundleImage, tasks.ami.RemoveBundle)
+	counter_task(taskset, tasks.ebs.Create, volume.Delete)
+	counter_task(taskset, tasks.ebs.Attach, volume.Detach)
+	counter_task(taskset, tasks.ami.BundleImage, tasks.ami.RemoveBundle)
--- a/docs/_static/graph.json
+++ b/docs/_static/graph.json
--- a/taskoverview.py
+++ b/taskoverview.py
@ -0,0 +1,76 @@
+#!/usr/bin/python
+
+
+def main(opts):
+	from bootstrapvz.base.tasklist import get_all_tasks
+	tasks = get_all_tasks()
+
+	def distinct(seq):
+		seen = set()
+		return [x for x in seq if x not in seen and not seen.add(x)]
+	modules = distinct([task.__module__ for task in tasks])
+	task_links = []
+	task_links.extend([{'source': task,
+	                    'target': succ,
+	                    'definer': task,
+	                    }
+	                   for task in tasks
+	                   for succ in task.successors])
+	task_links.extend([{'source': pre,
+	                    'target': task,
+	                    'definer': task,
+	                    }
+	                   for task in tasks
+	                   for pre in task.predecessors])
+
+	def mk_phase(phase):
+		return {'name': phase.name,
+		        'description': phase.description,
+		        }
+
+	def mk_module(module):
+		return {'name': module,
+		        }
+
+	from bootstrapvz.common import phases
+
+	def mk_node(task):
+		return {'name': task.__name__,
+		        'module': modules.index(task.__module__),
+		        'phase': (i for i, phase in enumerate(phases.order) if phase is task.phase).next(),
+		        }
+
+	def mk_link(link):
+		for key in ['source', 'target', 'definer']:
+			link[key] = tasks.index(link[key])
+		return link
+
+	data = {'phases': map(mk_phase, phases.order),
+	        'modules': map(mk_module, modules),
+	        'nodes': map(mk_node, tasks),
+	        'links': map(mk_link, task_links)}
+
+	write_data(data, opts.get('--output', None))
+
+
+def write_data(data, output_path=None):
+	import json
+	if output_path is None:
+		import sys
+		json.dump(data, sys.stdout, indent=4, separators=(',', ': '))
+	else:
+		with open(output_path, 'w') as output:
+			json.dump(data, output)
+
+
+if __name__ == '__main__' and __package__ is None:
+	from docopt import docopt
+	usage = """Usage: taskoverview.py [options]
+
+  Options:
+    --output <path> output
+    -h, --help           show this help
+"""
+	opts = docopt(usage)
+
+	main(opts)