diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..ce9b727 --- /dev/null +++ b/README.MD @@ -0,0 +1,68 @@ +# `dagger` - tools for library dependency graph introspection +Dagger contains tools which help developers understand MongoDB's build dependency graph. Currently, this repository contains: + + 1. query repl for asking various build dependency questions + 2. A visualizer for exploring the graph. + +## Usage +Do a MongoDB build locally, or via Evergreen on OSX or Linux hosts, and generate the library_dependency_graph.json file (by default the tool will generate this file in the root of your mongo directory or by download via Evergreen) + +``` +scons dagger +``` + +In the root of the Dagger tool directory, install the virtualenv + +``` +make install +``` + +Activate the virtualenv + +``` +eval $(make activate) +``` + +Now the dagger repl is initialized, and we can kick off the visualizer or query repl. + +To kick off the query repl: + +``` +dagger repl /path/to/json/file +``` + +# Queries + +Now your query engine repl is kicked off. + +To see all available queries: +``` +help +``` + +To see documentation for a query: +``` +help query_name +``` + + +# Examples +Find the explicit direct library dependencies +``` +get_explicit_lib_deps build/opt/mongo/transport/libtransport_layer_common.a +``` + +Find implicit library dependencies +``` +get_implicit_lib_deps build/opt/mongo/transport/libtransport_layer_common.a +``` + +Find the paths from LibA to LibB, or from EXE to LIB + +``` +get_link_paths build/opt/mongo/mongod build/opt/mongo/unittest/libunittest.a +``` +Find the symbol leaks in a library (the symbols that a library implicitly needs but are not satisfied by its explicit direct dependencies) +``` +symbol_leaks build/opt/mongo/unittest/libunittest.a +``` diff --git a/dagger/__init__.py b/dagger/__init__.py index e69de29..776e8a8 100644 --- a/dagger/__init__.py +++ b/dagger/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1-pre" diff --git a/dagger/cli.py b/dagger/cli.py index 750312f..9359fad 100644 --- a/dagger/cli.py +++ b/dagger/cli.py @@ -5,17 +5,17 @@ import libgiza.error import dagger.config.cli -import dagger.operations.hello_world import dagger.operations.version +import dagger.operations.repl import dagger.operations.viz logger = logging.getLogger("dagger.cli") commands = { "main": [ - dagger.operations.hello_world.main, dagger.operations.version.main, - dagger.operations.viz.main, + dagger.operations.repl.main, + dagger.operations.viz.main ], } diff --git a/dagger/config/cli.py b/dagger/config/cli.py index 7fd83b6..f963653 100644 --- a/dagger/config/cli.py +++ b/dagger/config/cli.py @@ -1,3 +1,4 @@ +import os import sys import logging @@ -52,3 +53,32 @@ def level(self, value): root_logger.setLevel(levels[value]) self.state['level'] = value + + @property + def path(self): + if 'path' in self.state: + return self.state['path'] + else: + raise KeyError("Path is not set") + + @path.setter + def path(self, value): + if isinstance(value, basestring): + if os.path.isfile(value): + self.state['path'] = value + else: + raise ValueError("Path must be a valid file") + else: + raise TypeError("Path must be a string corresponding to a file name") + + + + + + + + + + + + diff --git a/dagger/graph.py b/dagger/graph.py new file mode 100644 index 0000000..5ebe6f4 --- /dev/null +++ b/dagger/graph.py @@ -0,0 +1,593 @@ +import sys +import logging +import abc +import json +import copy + +import graph_consts + +if sys.version_info >= (3, 0): + basestring = str + +class Graph(object): + """Graph class for storing the build dependency graph. The graph stores the + directed edges as a nested dict of { RelationshipType: {From_Node: Set of + connected nodes}} and nodes as a dict of {nodeid : nodeobject}. Can be + imported from a pickle or JSON file. + """ + + def __init__(self, input=None): + """ + A graph can be initialized with a .json file, graph object, or with no args + """ + if isinstance(input, basestring): + if input.endswith('.json'): + with open(input, 'r') as f: + data = json.load(f, encoding="ascii") + nodes = {} + should_fail = False + + for node in data["nodes"]: + id = str(node["id"]) + try: + nodes[id] = node_factory(id, int(node["node"]["type"]), + dict_source=node["node"]) + except Exception as e: + logging.warning("Malformed Data: " + id) + should_fail = True + + if should_fail is True: + raise ValueError("json nodes are malformed") + + edges = {} + + for edge in data["edges"]: + if edge["type"] not in edges: + edges[edge["type"]] = {} + + to_edges = set([str(e["id"]) for e in edge["to_node"]]) + edges[edge["type"]][edge["from_node"]["id"]] = to_edges + + self._nodes = nodes + self._edges = edges + elif isinstance(input, Graph): + self._nodes = input.nodes + self._edges = input.edges + else: + self._nodes = {} + self._edges = {} + for rel in graph_consts.RELATIONSHIP_TYPES: + self._edges[rel] = {} + + @property + def nodes(self): + """We want to ensure that we are not able to mutate + the nodes or edges properties outside of the specified adder methods + """ + return copy.deepcopy(self._nodes) + + @property + def edges(self): + return copy.deepcopy(self._edges) + + @nodes.setter + def nodes(self, value): + if isinstance(value,dict): + self._nodes = value + else: + raise TypeError("Nodes must be a dict") + + @edges.setter + def edges(self, value): + if isinstance(value, dict): + self._edges = value + else: + raise TypeError("Edges must be a dict") + + def get_node(self, id): + return self._nodes.get(id) + + def find_node(self, id, type): + """returns the node if it exists, otherwise, generates + it""" + if self.get_node(id) is not None: + return self.get_node(id) + else: + node = node_factory(id, type) + self.add_node(node) + return node + + def get_edge_type(self, edge_type): + return self._edges[edge_type] + + def add_node(self, node): + if not isinstance(node, NodeInterface): + raise TypeError + + if node.id in self._nodes: + raise ValueError + + self._nodes[node.id] = node + + def add_edge(self, relationship, from_node, to_node): + if relationship not in graph_consts.RELATIONSHIP_TYPES: + raise TypeError + + from_node_obj = self.get_node(from_node) + to_node_obj = self.get_node(to_node) + + if from_node not in self._edges[relationship]: + self._edges[relationship][from_node] = set() + + if any(item is None for item in (from_node, to_node, from_node_obj, to_node_obj)): + raise ValueError + + self._edges[relationship][from_node].add(to_node) + + to_node_obj.add_incoming_edges(from_node_obj, self) + + # JSON does not support python sets, so we need to convert each + # set of edges to lists + def export_to_json(self, filename="graph.json"): + node_index = {} + + data = {"edges": [], "nodes": []} + + for idx, id in enumerate(self._nodes.keys()): + node = self.get_node(id) + node_index[id] = idx + node_dict = {} + node_dict["index"] = idx + node_dict["id"] = id + node_dict["node"] = {} + + for property, value in vars(node).iteritems(): + if isinstance(value, set): + node_dict["node"][property] = list(value) + else: + node_dict["node"][property] = value + + data["nodes"].append(node_dict) + + for edge_type in graph_consts.RELATIONSHIP_TYPES: + edges_dict = self._edges[edge_type] + for node in edges_dict.keys(): + to_nodes = list(self._edges[edge_type][node]) + to_nodes_dicts = [{"index": node_index[to_node], "id": to_node} + for to_node in to_nodes] + + data["edges"].append({"type": edge_type, + "from_node": {"id": node, + "index": node_index[node]}, + "to_node": to_nodes_dicts}) + + with open(filename, 'w') as outfile: + json.dump(data, outfile, indent=4, encoding="ascii") + + def __str__(self): + return ("").format(len(self._nodes.keys()), + sum(len(x) for x in self._edges.values()), hash(self)) + + +class NodeInterface(object): + """Abstract base class for all Node Objects - All nodes must have an id and name + """ + __metaclass__ = abc.ABCMeta + + @abc.abstractproperty + def id(self): + raise NotImplementedError() + + @abc.abstractproperty + def name(self): + raise NotImplementedError() + + +class NodeLib(NodeInterface): + """NodeLib class which represents a library within the graph + """ + def __init__(self, id, name, input=None): + if isinstance(input, dict): + should_fail = False + for k, v in input.iteritems(): + try: + if isinstance(v, list): + setattr(self, k, set(v)) + else: + setattr(self, k, v) + except AttributeError as e: + logging.error("found something bad, {0}, {1}", e, type(e)) + should_fail = True + if should_fail: + raise Exception("Problem setting attribute for NodeLib") + else: + self._id = id + self.type = graph_consts.NODE_LIB + self._name = name + self._defined_symbols = set() + self._defined_files = set() + self._dependent_files = set() + self._dependent_libs = set() + + @property + def id(self): + return self._id + + @property + def name(self): + return self._name + + @property + def defined_symbols(self): + return self._defined_symbols + + @defined_symbols.setter + def defined_symbols(self, value): + if isinstance(value, set): + self._defined_symbols = value + else: + raise TypeError("NodeLib.defined_symbols must be a set") + + @property + def defined_files(self): + return self._defined_files + + @defined_files.setter + def defined_files(self, value): + if isinstance(value, set): + self._defined_files = value + else: + raise TypeError("NodeLib.defined_files must be a set") + + @property + def dependent_files(self): + return self._dependent_files + + @dependent_files.setter + def dependent_files(self, value): + if isinstance(value, set): + self._dependent_files = value + else: + raise TypeError("NodeLib.dependent_files must be a set") + + @property + def dependent_libs(self): + return self._dependent_libs + + @dependent_libs.setter + def dependent_libs(self, value): + if isinstance(value, set): + self._defined_libs = value + else: + raise TypeError("NodeLib.defined_libs must be a set") + + def add_defined_symbol(self, symbol): + if symbol is not None: + self._defined_symbols.add(symbol) + + def add_defined_file(self, file): + if file is not None: + self._defined_files.add(file) + + def add_dependent_file(self, file): + if file is not None: + self._dependent_files.add(file) + + def add_dependent_lib(self, lib): + if lib is not None: + self._dependent_libs.add(lib) + + def add_incoming_edges(self, from_node, g): + """Whenever you generate a LIB_LIB edge, you must add + the source lib to the dependent_lib field in the target lib + """ + if from_node.type == graph_consts.NODE_LIB: + self.add_dependent_lib(from_node.id) + + def __eq__(self, other): + if isinstance(other, NodeLib): + return (self._id == other._id and self._defined_symbols == other._defined_symbols and + self._defined_files == other._defined_files and + self._dependent_libs == other._dependent_libs and + self._dependent_files == other._dependent_files) + + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return self.id + + +class NodeSymbol(NodeInterface): + """NodeSymbol class which represents a symbol within the dependency graph + """ + + def __init__(self, id, name, input=None): + if isinstance(input, dict): + should_fail = False + + for k, v in input.iteritems(): + try: + if isinstance(v, list): + setattr(self, k, set(v)) + else: + setattr(self, k, v) + except AttributeError as e: + logging.error("found something bad, {0}, {1}", e, type(e)) + should_fail = True + + if should_fail: + raise Exception("Problem setting attribute for NodeLib") + else: + self._id = id + self.type = graph_consts.NODE_SYM + self._name = name + self._dependent_libs = set() + self._dependent_files = set() + self._libs = set() + self._files = set() + + @property + def id(self): + return self._id + + @property + def name(self): + return self._name + + @property + def libs(self): + return self._libs + + @libs.setter + def libs(self, value): + if isinstance(value, set): + self._libs = value + else: + raise TypeError("NodeSymbol.libs must be a set") + + @property + def files(self): + return self._files + + @files.setter + def files(self, value): + if isinstance(value, set): + self._files = value + else: + raise TypeError("NodeSymbol.files must be a set") + + @property + def dependent_libs(self): + return self._dependent_libs + + @dependent_libs.setter + def dependent_libs(self, value): + if isinstance(value, set): + self._dependent_libs = value + else: + raise TypeError("NodeSymbol.dependent_libs must be a set") + + @property + def dependent_files(self): + return self._dependent_files + + @dependent_files.setter + def dependent_files(self, value): + if isinstance(value, set): + self._dependent_files = value + else: + raise TypeError("NodeSymbol.dependent_files must be a set") + + def add_library(self, library): + if library is not None: + self._libs.add(library) + + def add_file(self, file): + if file is not None: + self._files.add(file) + + def add_dependent_file(self, file): + if file is not None: + self._dependent_files.add(file) + + def add_dependent_lib(self, library): + if library is not None: + self._dependent_libs.add(library) + + def add_incoming_edges(self, from_node, g): + if from_node.type == graph_consts.NODE_FILE: + if from_node.library not in self.libs: + self.add_dependent_lib(from_node.library) + + self.add_dependent_file(from_node.id) + + lib_node = g.get_node(from_node.library) + + if lib_node is not None and from_node.library not in self.libs: + g.add_edge(graph_consts.LIB_SYM, lib_node.id, self.id) + + def __eq__(self, other): + if isinstance(other, NodeSymbol): + return (self.id == other.id and self._libs == other._libs and + self._files == other._files and + self._dependent_libs == other._dependent_libs and + self._dependent_files == other._dependent_files + ) + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return self.id + + +class NodeFile(NodeInterface): + """NodeFile class which represents an object file within the build dependency graph + """ + + def __init__(self, id, name, input=None): + if isinstance(input, dict): + should_fail = False + for k, v in input.iteritems(): + try: + if isinstance(v, list): + setattr(self, k, set(v)) + else: + setattr(self, k, v) + except AttributeError as e: + logging.error("found something bad, {0}, {1}", e, type(e)) + should_fail = True + if should_fail: + raise Exception("Problem setting attribute for NodeLib") + else: + self._id = id + self.type = graph_consts.NODE_FILE + self._name = name + self._defined_symbols = set() + self._dependent_libs = set() + self._dependent_files = set() + self._lib = None + + @property + def id(self): + return self._id + + @property + def name(self): + return self._name + + @property + def defined_symbols(self): + return self._defined_symbols + + @defined_symbols.setter + def defined_symbols(self, value): + if isinstance(value, set): + self._defined_symbols = value + else: + raise TypeError("NodeFile.defined_symbols must be a set") + + @property + def dependent_libs(self): + return self._dependent_libs + + @dependent_libs.setter + def dependent_libs(self, value): + if isinstance(value, set): + self._dependent_libs = value + else: + raise TypeError("NodeFile.dependent_libs must be a set") + + @property + def dependent_files(self): + return self._dependent_files + + @dependent_files.setter + def dependent_files(self, value): + if isinstance(value, set): + self._dependent_files = value + else: + raise TypeError("NodeFile.dependent_files must be a set") + + @property + def library(self): + return self._lib + + @library.setter + def library(self, library): + if library is not None: + self._lib = library + + def add_defined_symbol(self, symbol): + if symbol is not None: + self._defined_symbols.add(symbol) + + def add_dependent_file(self, file): + if file is not None: + self._dependent_files.add(file) + + def add_dependent_lib(self, library): + if library is not None: + self._dependent_libs.add(library) + + def add_incoming_edges(self, from_node, g): + if from_node.type == graph_consts.NODE_FILE: + self.add_dependent_file(from_node.id) + lib_node = g.get_node(self.library) + + if from_node.library is not None and from_node.library != self.library: + self.add_dependent_lib(from_node.library) + g.add_edge(graph_consts.LIB_FIL, from_node.library, self.id) + if lib_node is not None: + lib_node.add_dependent_file(from_node.id) + lib_node.add_dependent_lib(from_node.library) + g.add_edge(graph_consts.FIL_LIB, from_node.id, lib_node.id) + + def __eq__(self, other): + if isinstance(other, NodeSymbol): + return (self.id == other.id and self._lib == other._lib and + self._dependent_libs == other._dependent_libs and + self._dependent_files == other._dependent_files and + self._defined_symbols == other._defined_symbols) + + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return self.id + + +class NodeExe(NodeInterface): + def __init__(self, id, name, input=None): + if isinstance(input, dict): + should_fail = False + for k, v in input.iteritems(): + try: + if isinstance(v, list): + setattr(self, k, set(v)) + else: + setattr(self, k, v) + except AttributeError as e: + logging.error("found something bad, {0}, {1}", e, type(e)) + should_fail = True + if should_fail: + raise Exception("Problem setting attribute for NodeExe") + else: + self._id = id + self.type = graph_consts.NODE_EXE + self._name = name + self.contained_files = set() + + @property + def id(self): + return self._id + + @property + def name(self): + return self._name + + def __repr__(self): + return self.id + + +types = {graph_consts.NODE_LIB: NodeLib, + graph_consts.NODE_SYM: NodeSymbol, + graph_consts.NODE_FILE: NodeFile, + graph_consts.NODE_EXE: NodeExe,} + + +def node_factory(id, nodetype, dict_source=None): + if isinstance(dict_source, dict): + return types[nodetype](id, id, input=dict_source) + else: + return types[nodetype](id, id) diff --git a/dagger/graph_consts.py b/dagger/graph_consts.py new file mode 100644 index 0000000..81fe86d --- /dev/null +++ b/dagger/graph_consts.py @@ -0,0 +1,26 @@ +"""Constants for use in graph.py and dagger.py""" + +"""Relationship edge types""" +LIB_LIB = 1 +LIB_FIL = 2 +FIL_LIB = 3 +FIL_FIL = 4 +FIL_SYM = 5 +LIB_SYM = 6 +IMP_LIB_LIB = 7 +EXE_LIB = 8 + + +"""NodeTypes""" +NODE_LIB = 1 +NODE_SYM = 2 +NODE_FILE = 3 +NODE_EXE = 4 + +RELATIONSHIP_TYPES = range(1, 9) +NODE_TYPES = range(1, 5) + + +"""Error/query codes""" +NODE_NOT_FOUND = 1 + diff --git a/dagger/operations/hello_world.py b/dagger/operations/hello_world.py deleted file mode 100644 index d5d8cbf..0000000 --- a/dagger/operations/hello_world.py +++ /dev/null @@ -1,8 +0,0 @@ -import argh - - -@argh.named("hello") -def main(): - """A simple 'hello world' example.""" - - return "hello world!" diff --git a/dagger/operations/repl.py b/dagger/operations/repl.py new file mode 100644 index 0000000..ce19462 --- /dev/null +++ b/dagger/operations/repl.py @@ -0,0 +1,285 @@ +import os +import sys +import cmd +import argh +import logging + +import dagger.repl.query_engine +import dagger.graph +import dagger.graph_consts + + +def print_incoming_graph(g, edge_type): + if g is None: + return + + for dep in sorted(g.get_edge_type(edge_type).keys()): + print(dep) + +def print_outgoing_graph(g, source, edge_type): + if g is None: + return + + for dep in sorted(g.get_edge_type(edge_type).get(source)): + print(dep) + +def print_node_set(nodes): + if nodes is None: + return + + for node in sorted(nodes): + print(node.id) + +def print_node(node): + for attr, value in node.__dict__.iteritems(): + print(str(attr) + ":" + "\n" + str(value)) + +def print_paths(paths): + for path in paths: + print(" -> ".join(path)) + print("-------------------") + +class QueryRepl(cmd.Cmd): + prompt = '>> ' + + def import_graph(self, path): + """Imports the dagger.graph, and calls cmdloop to start the repl""" + + print("Importing graph into query engine from JSON") + self.g = dagger.graph.Graph(path) + print("Successfully imported graph, start querying! " + "Type 'help' for a list of query commands") + self.cmdloop() + + def do_get_flattened_exp_deps(self, line): + """Gets the flattened list of explicit library dependencies""" + if self.g.get_node(line) is None: + print("Node ID not valid") + print_node_set(dagger.repl.query_engine.get_flattened_deps(self.g, line, + dagger.graph_consts.LIB_LIB)) + + def do_get_flattened_imp_deps(self, line): + """Gets the flattened list of implicit library dependencies""" + if self.g.get_node(line) is None: + print("Node ID not valid") + print_node_set(dagger.repl.query_engine.get_flattened_deps(self.g, line, + dagger.graph_consts.IMP_LIB_LIB)) + + def do_get_link_paths(self, line): + """Usage: get_link_paths libA libB , Gets the link paths from LibA to LibB + or an executable to a library""" + source, target = line.split(" ") + print_paths(dagger.repl.query_engine.get_link_paths(self.g, source, target)) + + def do_get_node(self, line): + """Prints all information about a node object""" + if self.g.get_node(line) is None: + print("Node ID not valid") + + print_node(self.g.get_node(line)) + + def do_symbol_leaks(self, line): + """Returns the symbols that are needed by this library, but not defined + within the library or any of it's explicit dependencies""" + + print_node_set(dagger.repl.query_engine.find_symbol_leaks(self.g, line)) + + def do_get_dependent_libs(self, line): + """Gets all the libraries dependent on this symbol, file, or library""" + + node = self.g.get_node(line) + if node is None: + print("Node ID does not exist") + return + + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_LIB + + elif node.type == dagger.graph_consts.NODE_SYM: + edge_type = dagger.graph_consts.LIB_SYM + + else: + edge_type = dagger.graph_consts.LIB_FIL + + print_incoming_graph( + dagger.repl.query_engine.get_dependent_libs( + self.g, line), edge_type) + + def do_get_dependent_files(self, line): + """Gets all files dependent on this symbol, file, or library""" + + node = self.g.get_node(line) + + if node is None: + print("Node ID does not exist") + return + + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.FIL_LIB + + elif node.type == dagger.graph_consts.NODE_SYM: + edge_type = dagger.graph_consts.FIL_SYM + + else: + edge_type = dagger.graph_consts.FIL_FIL + + print_incoming_graph( + dagger.repl.query_engine.get_dependent_files( + self.g, line), edge_type) + + def do_get_explicit_lib_deps(self, line): + """Gets this libraries explicitly defined dependencies""" + + node = self.g.get_node(line) + + if node is None: + print("Node ID does not exist") + return + + if node.type not in (dagger.graph_consts.NODE_LIB, dagger.graph_consts.NODE_EXE): + print("Node is not a valid type for this query") + return + + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_LIB + else: + edge_type = dagger.graph_consts.EXE_LIB + + print_outgoing_graph(dagger.repl.query_engine.get_explicit_lib_deps(self.g, line), + line, edge_type) + + def do_get_implicit_lib_deps(self, line): + """Gets all implicit library dependencies for this library""" + + node = self.g.get_node(line) + + if node is None: + print("Node ID does not exist") + return + + if node.type != dagger.graph_consts.NODE_LIB: + print("Node is not a valid type for this query") + return + + print_outgoing_graph(dagger.repl.query_engine.get_implicit_lib_deps(self.g, line), line, dagger.graph_consts.IMP_LIB_LIB) + + def do_get_symbol_deps(self, line): + """Gets all symbol dependencies for this library or file""" + + node = self.g.get_node(line) + + if node is None: + print("Not a valid Node ID") + return + + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_SYM + elif node.type == dagger.graph_consts.NODE_FILE: + edge_type = dagger.graph_consts.FIL_SYM + else: + print("Node is not a valid type for this query") + return + + print_outgoing_graph( + dagger.repl.query_engine.get_symbol_deps( + self.g, line), line, edge_type) + + def do_get_defined_symbols(self, line): + """Gets all symbols defined in this library or file""" + + node = self.g.get_node(line) + + if node is None: + print("Not a valid Node ID") + return + + if node.type not in (dagger.graph_consts.NODE_LIB, dagger.graph_consts.NODE_FILE): + print("Node is not of valid type for this operation") + return + + print_node_set(dagger.repl.query_engine.get_defined_symbols(self.g, line)) + + def do_get_defined_files(self, line): + """Gets all the files defined within this library""" + + node = self.g.get_node(line) + + if node is None: + print("Not a valid Node ID") + return + + if node.type not in (dagger.graph_consts.NODE_LIB, dagger.graph_consts.NODE_FILE): + print("Node is not of valid type for this operation") + return + + print_node_set(dagger.repl.query_engine.get_defined_files(self.g, line)) + + def do_get_file(self, line): + """Gets the file/files this symbol is defined in""" + + node = self.g.get_node(line) + if node is None: + print("Not a valid Node ID") + return + nodes = dagger.repl.query_engine.get_file(self.g, line) + if nodes is None: + return + print_node_set(nodes) + + def do_get_lib(self, line): + """Gets the lib/libs this symbol is defined in""" + + nodes = dagger.repl.query_engine.get_lib(self.g, line) + + if nodes is None: + return + + print_node_set(nodes) + + def do_get_file_deps(self, line): + """Gets the files this file/library depends on""" + + node = self.g.get_node(line) + if node is None: + print("Not a valid Node ID") + return + + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_FIL + elif node.type == dagger.graph_consts.NODE_FILE: + edge_type = dagger.graph_consts.FIL_FIL + else: + print("Not a valid node for this query") + return + + print_outgoing_graph( + dagger.repl.query_engine.get_file_deps( + self.g, line), line, edge_type) + + def do_get_lib_deps_f(self, line): + """Gets the libraries this file depends on""" + + print_outgoing_graph(dagger.repl.query_engine.get_lib_deps_f(self.g, line), line, graph_consts.FIL_LIB) + + def do_detect_cycles(self, line): + """Returns the cycles found in this dagger.graph""" + + print_paths(dagger.repl.query_engine.detect_cycles(self.g)) + + def do_exit(self, line): + "Exits the repl" + + print("Quitting the query engine - Goodbye!") + raise SystemExit() + + +@argh.arg("path", help="path to file", default="foo.json") +@argh.expects_obj +@argh.named("repl") +def main(args): + try: + QueryRepl().import_graph(args.path) + except ValueError: + logging.error("Your json data is malformed, do a rebuild") + except IOError: + logging.error("cannot open", args.path) diff --git a/dagger/operations/viz.py b/dagger/operations/viz.py index f3e3510..808ea23 100644 --- a/dagger/operations/viz.py +++ b/dagger/operations/viz.py @@ -6,7 +6,7 @@ @argh.named("viz") def main(): - t = threading.Thread(target=dagger.visualizations.server.main) + t = threading.Thread(target=dagger.visualization.server.main) t.daemon = True t.start() url = "127.0.0.1:5000/interactive" diff --git a/dagger/repl/__init__.py b/dagger/repl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dagger/repl/query_engine.py b/dagger/repl/query_engine.py new file mode 100644 index 0000000..2086f72 --- /dev/null +++ b/dagger/repl/query_engine.py @@ -0,0 +1,389 @@ +import dagger.graph +import dagger.graph_consts +import copy + + +def get_flattened_deps(g, library, edge_type): + """Returns all the dependencies which are explicitly satisfied""" + + lib_node = g.get_node(library) + if lib_node is None: + return None + deps = set() + libraries = set([library]) + visited = set([library]) + + while True: + if len(libraries) == 0: + break + + new_libs = set() + + for library in libraries: + if edge_type == dagger.graph_consts.LIB_LIB: + children = g.get_edge_type(dagger.graph_consts.LIB_LIB).get(library) + else: + implicit_dep_graph = get_implicit_lib_deps(g, library) + if implicit_dep_graph is None: + continue + children = implicit_dep_graph.get_edge_type(dagger.graph_consts.IMP_LIB_LIB).get(library) + + if children is None: + continue + + deps.update([g.get_node(child) for child in children]) + new_libs = new_libs | (children - visited) + + libraries = new_libs + + return deps + +def get_extra_links(g, library): + pass + +def get_link_paths(g, source, target): + """Returns a list of tuples, each tuple representing a path from + node 1 to node 2, implemented via recursive dfs""" + source_node = g.get_node(source) + target_node = g.get_node(target) + + if None in (source_node, target_node): + return None + + if source_node.type == dagger.graph_consts.NODE_LIB: + direct_deps = g.get_edge_type(dagger.graph_consts.LIB_LIB).get(source) + elif source_node.type == dagger.graph_consts.NODE_EXE: + direct_deps = g.get_edge_type(dagger.graph_consts.EXE_LIB).get(source) + + paths = [] + + if len(direct_deps) == 0: + return None + + for lib in direct_deps: + compute_link_paths(g, source, target, lib, paths, [source], set([source])) + + return paths + +def compute_link_paths(g, source, target, lib, paths, path, visited): + """Computes all the paths between two libraries, or an executable and a library""" + + direct_deps = g.get_edge_type(dagger.graph_consts.LIB_LIB).get(lib) + if lib == target: + path.append(target) + paths.append(path) + return + if lib in visited: + return + if direct_deps is None: + return + + visited.update([lib]) + path.append(lib) + for child in direct_deps: + compute_link_paths(g, source, target, child, paths, copy.deepcopy(path), visited) + + + +def find_symbol_leaks(g, library): + """Finds all the symbols that are required, but not defined within this library + or any of its explicit direct explicit dependencies""" + + lib_node = g.get_node(library) + explicit_dependencies = g.get_edge_type(dagger.graph_consts.LIB_LIB).get(library) + + symbols_defined = lib_node.defined_symbols + + if explicit_dependencies is not None: + for lib in explicit_dependencies: + symbols_defined.update(g.get_node(lib).defined_symbols) + + symbols_used = g.get_edge_type(dagger.graph_consts.LIB_SYM).get(library) + symbols_used_clean = set() + + if symbols_used is None: + return None + + for symbol in symbols_used: + if g.get_node(symbol).files is not None and len( + g.get_node(symbol).files) > 0: + symbols_used_clean.add(symbol) + + return set([g.get_node(x) + for x in symbols_used_clean.difference(symbols_defined)]) + +def get_lib(g, id): + """returns the lib this file/symbol is contained in as a set of nodes. + Multiply defined symbols can have more than one lib in which it's defined""" + + node = g.get_node(id) + + if node is None: + return + if node.type is dagger.graph_consts.NODE_FILE: + return set([g.get_node(node.library)]) + elif node.type is dagger.graph_consts.NODE_SYM: + return set([g.get_node(id) for id in node.libs]) + +def get_file(g, id): + """returns the file this symbol is contained in. Multiply defined symbols can have more than one + file in which it's defined""" + + node = g.get_node(id) + + if node is None: + return + if node.type is dagger.graph_consts.NODE_SYM: + return set([g.get_node(id) for id in node.files]) + +def get_dependent_libs(g, id): + """returns the libraries which are dependent on the given symbol, lib, or + file id""" + + target_node = g.get_node(id) + + if target_node is None: + return None + if target_node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_LIB + elif target_node.type == dagger.graph_consts.NODE_SYM: + edge_type = dagger.graph_consts.LIB_SYM + else: + edge_type = dagger.graph_consts.LIB_FIL + + return gen_incoming_subgraph(g, id, "dependent_libs", edge_type) + + +def find_node(g, id): + """returns a node given the node id""" + + return g.get_node(id) + +def get_dependent_files(g, id): + """returns the files which are dependent on the given symbol, lib, or file id""" + + target_node = g.get_node(id) + + if target_node is None: + return None + if target_node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.FIL_LIB + elif target_node.type == dagger.graph_consts.NODE_SYM: + edge_type = dagger.graph_consts.FIL_SYM + else: + edge_type = dagger.graph_consts.FIL_FIL + + return gen_incoming_subgraph(g, id, "dependent_files", edge_type) + +def get_defined_symbols(g, id): + """returns the symbols which are defined in the given file or library""" + node = g.get_node(id) + + if node is None: + return None + if node.type not in (dagger.graph_consts.NODE_LIB, dagger.graph_consts.NODE_FILE): + raise TypeError() + if node.defined_symbols is None: + return node + return set([g.get_node(id) for id in node.defined_symbols]) + +def get_defined_files(g, id): + """returns the files which are defined in the given library""" + + node = g.get_node(id) + + if node is None: + return None + if node.type != dagger.graph_consts.NODE_LIB: + raise TypeError() + if node.defined_files is None: + return node + return set([g.get_node(id) for id in node.defined_files]) + +def get_file_deps(g, id): + """returns the files which the given lib or file depends on""" + + node = g.get_node(id) + if node is None: + return None + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_FIL + elif node.type == dagger.graph_consts.NODE_FILE: + edge_type = dagger.graph_consts.FIL_FIL + else: + raise TypeError() + + return gen_outgoing_subgraph(g, id, edge_type) + +def get_symbol_deps(g, id): + """returns the symbols which the given lib or file depends on""" + + node = g.get_node(id) + if node is None: + return None + if node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.LIB_SYM + elif node.type == dagger.graph_consts.NODE_FILE: + edge_type = dagger.graph_consts.FIL_SYM + else: + raise TypeError() + + return gen_outgoing_subgraph(g, id, edge_type) + +def get_lib_deps_f(g, id): + """returns the libraries that a file depends on""" + + node = g.get_node(id) + if node is None: + return None + if node.type != dagger.graph_consts.NODE_FILE: + raise TypeError() + + return gen_outgoing_subgraph(g, id, dagger.graph_consts.FIL_LIB) + +def get_explicit_lib_deps(g, id): + """returns the libraries that a library explicitly depends on""" + node = g.get_node(id) + if node is None: + return None + + if node.type not in (dagger.graph_consts.NODE_LIB, dagger.graph_consts.NODE_EXE): + raise TypeError() + + if node.type == dagger.graph_consts.NODE_LIB: + return gen_outgoing_subgraph(g, id, dagger.graph_consts.LIB_LIB) + else: + return gen_outgoing_subgraph(g, id, dagger.graph_consts.EXE_LIB) + +# TODO populate these in the SCons tool under the edgetype dagger.graph_consts.IMP_LIB_LIB +def get_implicit_lib_deps(g, id): + """returns the libraries that a library implicitly depends on""" + + source_node = g.get_node(id) + + if source_node is None: + return None + + if source_node.type == dagger.graph_consts.NODE_LIB: + edge_type = dagger.graph_consts.IMP_LIB_LIB + symbols = g.get_edge_type(dagger.graph_consts.LIB_SYM).get(id) + elif source_node.type == dagger.graph_consts.NODE_EXE: + edge_type = dagger.graph_consts.EXE_LIB + symbols = g.get_edge_type(dagger.graph_consts.LIB_SYM).get(id) + else: + raise TypeError() + + deps = set() + if symbols is None: + return None + + for symbol in symbols: + symbol_node = g.get_node(symbol) + if symbol_node.libs is not None: + for lib in symbol_node.libs: + if lib != id and lib is not None: + deps. add(lib) + + if len(deps) == 0: + return None + + nodes = {k: v for (k, v) in zip(deps, (g.get_node(id) for id in deps))} + nodes[id] = source_node + sub_graph = dagger.graph.Graph() + sub_graph.nodes = nodes + sub_graph.get_edge_type(dagger.graph_consts.IMP_LIB_LIB)[id] = deps + return sub_graph + +# TODO test and fix this function +def detect_cycles(g): + """Returns a list of lists, where each list represents a cycle found in the graph""" + + lib_nodes = (x for x in g._nodes.keys() if g.get_node( + x).type == dagger.graph_consts.NODE_LIB) + + cycles = set() + + for lib in lib_nodes: + imp_deps = [x.id for x in get_flattened_imp_deps(g, lib)] + if lib in imp_deps: + print lib + cycle = detect_lib_cycle(g, lib, set(), ()) + if cycle is not None: + cycles.add(cycle) + + return [x for x in cycles if x[0] == x[-1]] + + +def detect_lib_cycle(g, lib, visited, path): + if lib in visited: + return path + (lib,) + + visited.update([lib]) + implicit_dep_graph = get_implicit_lib_deps(g, lib) + + if implicit_dep_graph is None: + return None + + deps = implicit_dep_graph.get_edge_type(dagger.graph_consts.IMP_LIB_LIB)[lib] + + for dep in deps: + cycle = detect_lib_cycle(g, dep, visited.copy(), path + (lib,)) + if cycle is not None: + return cycle + +def gen_outgoing_subgraph(g, source, edge_type): + """Generates a subgraph for queries that ask about direct edge relationships such as FIL_LIB, + LIB_LIB, LIB_SYM etc.""" + + source_node = g.get_node(source) + + if source_node is None: + return dagger.graph_consts.NODE_NOT_FOUND + + dep_set = g.get_edge_type(edge_type).get(source) + + if dep_set is None: + return None + + # need intermediate variable because of the way graph.nodes' setter works + nodes = zip_nodes(g, dep_set) + nodes[source] = source_node + + sub_graph = dagger.graph.Graph() + sub_graph.nodes = nodes + sub_graph.get_edge_type(edge_type)[source] = set(dep_set) + return sub_graph + + +def gen_incoming_subgraph(g, target, field, edge_type): + """Generates a subgraph for queries that ask about incoming edge relationships, such as which + libraries are dependent on the given target file or library""" + + target_node = g.get_node(target) + if field == "dependent_libs": + source_set = target_node.dependent_libs + elif field == "dependent_files": + source_set = target_node.dependent_files + + if source_set is None: + # Maybe just return a single node object as a graph instead of None? + return None + + nodes = zip_nodes(g, source_set) + nodes[target] = target_node + + sub_graph = dagger.graph.Graph() + sub_graph.nodes = nodes + + for id in source_set: + sub_graph.get_edge_type(edge_type)[id] = set([target]) + + return sub_graph + +def zip_nodes(g, source_set): + return { + k: v for ( + k, + v) in zip( + source_set, + (g.get_node(id) for id in source_set))} diff --git a/dagger/visualization/__init__.py b/dagger/visualization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dagger/visualizations/server.py b/dagger/visualization/server.py similarity index 99% rename from dagger/visualizations/server.py rename to dagger/visualization/server.py index 31448dd..6b0e41e 100644 --- a/dagger/visualizations/server.py +++ b/dagger/visualization/server.py @@ -10,7 +10,6 @@ import flask import dagger.graph -import dagger.query_engine app = flask.Flask("Dagger") @@ -21,7 +20,7 @@ def map_node_type_to_str(num): return "Symbol" elif num == 3: return "File" - + raise ValueError("Node type should be 1,2, or 3") def map_edge_type_to_str(num): diff --git a/dagger/visualizations/static/interactive.css b/dagger/visualization/static/interactive.css similarity index 100% rename from dagger/visualizations/static/interactive.css rename to dagger/visualization/static/interactive.css diff --git a/dagger/visualizations/templates/interactive.html b/dagger/visualization/templates/interactive.html similarity index 100% rename from dagger/visualizations/templates/interactive.html rename to dagger/visualization/templates/interactive.html