Coverage for python/lum/clu/processors/directed_graph.py: 100%
13 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-17 18:41 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-17 18:41 +0000
1from pydantic import BaseModel, Field
2import typing
4__all__ = ["DirectedGraph"]
9class Edge(BaseModel):
11 source: int = Field(description="0-based index of token serving as relation's source")
12 destination: int = Field(description="0-based index of token serving as relation's destination")
13 relation: str = Field(description="label for relation")
15class DirectedGraph(BaseModel):
17 STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic"
18 STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] = "stanford-collapsed"
20 roots: list[int] = Field(description="Roots of the directed graph")
21 edges: list[Edge] = Field(description="the directed edges that comprise the graph")
23 """
24 Storage class for directed graphs.
27 Parameters
28 ----------
29 kind : str
30 The name of the directed graph.
32 deps : dict
33 A dictionary of {edges: [{source, destination, relation}], roots: [int]}
35 words : [str]
36 A list of the word form of the tokens from the originating `Sentence`.
38 Attributes
39 ----------
40 _words : [str]
41 A list of the word form of the tokens from the originating `Sentence`.
43 roots : [int]
44 A list of indices for the syntactic dependency graph's roots. Generally this is a single token index.
46 edges: list[lum.clu.processors.doc.Edge]
47 A list of `lum.clu.processors.doc.Edge`
49 incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.
51 outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.
53 labeled : [str]
54 A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").
56 unlabeled : [str]
57 A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").
59 graph : networkx.Graph
60 A `networkx.graph` representation of the `DirectedGraph`. Used by `shortest_path`
62 Methods
63 -------
64 bag_of_labeled_dependencies_from_tokens(form)
65 Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.
66 bag_of_unlabeled_dependencies_from_tokens(form)
67 Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.
68 """
70 # def __init__(self, kind, deps, words):
71 # NLPDatum.__init__(self)
72 # self._words = [w.lower() for w in words]
73 # self.kind = kind
74 # self.roots = deps.get("roots", [])
75 # self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]
76 # self.incoming = self._build_incoming(self.edges)
77 # self.outgoing = self._build_outgoing(self.edges)
78 # self.labeled = self._build_labeled()
79 # self.unlabeled = self._build_unlabeled()
80 # self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)
81 # self.undirected_graph = self.directed_graph.to_undirected()
83 # def __unicode__(self):
84 # return self.edges
86 # def __eq__(self, other):
87 # if isinstance(other, self.__class__):
88 # return self.to_JSON() == other.to_JSON()
89 # else:
90 # return False
92 # def __ne__(self, other):
93 # return not self.__eq__(other)
95 # def __hash__(self):
96 # return hash(self.to_JSON())
98 # def shortest_paths(self, start, end):
99 # """
100 # Find the shortest paths in the syntactic depedency graph
101 # between the provided start and end nodes.
103 # Parameters
104 # ----------
105 # start : int or [int]
106 # A single token index or list of token indices serving as the start of the graph traversal.
108 # end : int or [int]
109 # A single token index or list of token indices serving as the end of the graph traversal.
111 # See Also
112 # --------
113 # `processors.paths.DependencyUtils.shortest_path`
114 # """
115 # paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)
116 # return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]
118 # def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):
119 # """
120 # Find the shortest path in the syntactic depedency graph
121 # between the provided start and end nodes.
123 # Parameters
124 # ----------
125 # start : int or [int]
126 # A single token index or list of token indices serving as the start of the graph traversal.
128 # end : int or [int]
129 # A single token index or list of token indices serving as the end of the graph traversal.
131 # scoring_func : function
132 # A function that scores each path in a list of [(source index, directed relation, destination index)] paths. Each path has the form [(source index, relation, destination index)].
133 # The path with the maximum score will be returned.
135 # See Also
136 # --------
137 # `processors.paths.DependencyUtils.shortest_path`
138 # """
139 # paths = self.shortest_paths(start, end)
140 # return None if not paths else max(paths, key=scoring_func)
142 # def degree_centrality(self):
143 # """
144 # Compute the degree centrality for nodes.
146 # See Also
147 # --------
148 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
149 # """
150 # return Counter(nx.degree_centrality(self.directed_graph))
152 # def in_degree_centrality(self):
153 # """
154 # Compute the in-degree centrality for nodes.
156 # See Also
157 # --------
158 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
159 # """
160 # return Counter(nx.in_degree_centrality(self.directed_graph))
162 # def out_degree_centrality(self):
163 # """
164 # Compute the out-degree centrality for nodes.
166 # See Also
167 # --------
168 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html
169 # """
170 # return Counter(nx.out_degree_centrality(self.directed_graph))
172 # def pagerank(self,
173 # alpha=0.85,
174 # personalization=None,
175 # max_iter=1000,
176 # tol=1e-06,
177 # nstart=None,
178 # weight='weight',
179 # dangling=None,
180 # use_directed=True,
181 # reverse=True):
182 # """
183 # Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `lum.clu.processors.doc.DirectedGraph.graph`.
184 # Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).
186 # See Also
187 # --------
188 # `processors.paths.DependencyUtils.pagerank`
189 # Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)
190 # """
191 # # check whether or not to reverse directed graph
192 # dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)
193 # # determine graph to use
194 # graph = dg if use_directed else self.undirected_graph
195 # return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)
197 # def _build_incoming(self, edges):
198 # dep_dict = defaultdict(list)
199 # for edge in edges:
200 # dep_dict[edge.destination].append((edge.source, edge.relation))
201 # return dep_dict
203 # def _build_outgoing(self, edges):
204 # dep_dict = defaultdict(list)
205 # for edge in edges:
206 # dep_dict[edge.source].append((edge.destination, edge.relation))
207 # return dep_dict
209 # def _build_labeled(self):
210 # labeled = []
211 # for out in self.outgoing:
212 # for (dest, rel) in self.outgoing[out]:
213 # labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))
214 # return labeled
216 # def _build_unlabeled(self):
217 # unlabeled = []
218 # for out in self.outgoing:
219 # for (dest, _) in self.outgoing[out]:
220 # unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))
221 # return unlabeled
223 # def _graph_to_JSON_dict(self):
224 # dg_dict = dict()
225 # dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]
226 # dg_dict["roots"] = self.roots
227 # return dg_dict
229 # def to_JSON_dict(self):
230 # return {self.kind:self._graph_to_JSON_dict()}