Coverage for python/lum/clu/processors/directed

1from pydantic import BaseModel, Field

2import typing

4__all__ = ["DirectedGraph"]

9class Edge(BaseModel):

11 source: int = Field(description="0-based index of token serving as relation's source")

12 destination: int = Field(description="0-based index of token serving as relation's destination")

13 relation: str = Field(description="label for relation")

15class DirectedGraph(BaseModel):

17 STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic"

18 STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] = "stanford-collapsed"

20 roots: list[int] = Field(description="Roots of the directed graph")

21 edges: list[Edge] = Field(description="the directed edges that comprise the graph")

23 """

24 Storage class for directed graphs.

27 Parameters

28 ----------

29 kind : str

30 The name of the directed graph.

32 deps : dict

33 A dictionary of {edges: [{source, destination, relation}], roots: [int]}

35 words : [str]

36 A list of the word form of the tokens from the originating `Sentence`.

38 Attributes

39 ----------

40 _words : [str]

41 A list of the word form of the tokens from the originating `Sentence`.

43 roots : [int]

44 A list of indices for the syntactic dependency graph's roots. Generally this is a single token index.

46 edges: list[lum.clu.processors.doc.Edge]

47 A list of `lum.clu.processors.doc.Edge`

49 incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph.

51 outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph.

53 labeled : [str]

54 A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination").

56 unlabeled : [str]

57 A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination").

59 graph : networkx.Graph

60 A `networkx.graph` representation of the `DirectedGraph`. Used by `shortest_path`

62 Methods

63 -------

64 bag_of_labeled_dependencies_from_tokens(form)

65 Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation.

66 bag_of_unlabeled_dependencies_from_tokens(form)

67 Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation.

68 """

70 # def __init__(self, kind, deps, words):

71 # NLPDatum.__init__(self)

72 # self._words = [w.lower() for w in words]

73 # self.kind = kind

74 # self.roots = deps.get("roots", [])

75 # self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]]

76 # self.incoming = self._build_incoming(self.edges)

77 # self.outgoing = self._build_outgoing(self.edges)

78 # self.labeled = self._build_labeled()

79 # self.unlabeled = self._build_unlabeled()

80 # self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False)

81 # self.undirected_graph = self.directed_graph.to_undirected()

83 # def __unicode__(self):

84 # return self.edges

86 # def __eq__(self, other):

87 # if isinstance(other, self.__class__):

88 # return self.to_JSON() == other.to_JSON()

89 # else:

90 # return False

92 # def __ne__(self, other):

93 # return not self.__eq__(other)

95 # def __hash__(self):

96 # return hash(self.to_JSON())

98 # def shortest_paths(self, start, end):

99 # """

100 # Find the shortest paths in the syntactic depedency graph

101 # between the provided start and end nodes.

102

103 # Parameters

104 # ----------

105 # start : int or [int]

106 # A single token index or list of token indices serving as the start of the graph traversal.

107

108 # end : int or [int]

109 # A single token index or list of token indices serving as the end of the graph traversal.

110

111 # See Also

112 # --------

113 # `processors.paths.DependencyUtils.shortest_path`

114 # """

115 # paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end)

116 # return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths]

117

118 # def shortest_path(self, start, end, scoring_func=lambda path: -len(path)):

119 # """

120 # Find the shortest path in the syntactic depedency graph

121 # between the provided start and end nodes.

122

123 # Parameters

124 # ----------

125 # start : int or [int]

126 # A single token index or list of token indices serving as the start of the graph traversal.

127

128 # end : int or [int]

129 # A single token index or list of token indices serving as the end of the graph traversal.

130

131 # scoring_func : function

132 # A function that scores each path in a list of [(source index, directed relation, destination index)] paths. Each path has the form [(source index, relation, destination index)].

133 # The path with the maximum score will be returned.

134

135 # See Also

136 # --------

137 # `processors.paths.DependencyUtils.shortest_path`

138 # """

139 # paths = self.shortest_paths(start, end)

140 # return None if not paths else max(paths, key=scoring_func)

141

142 # def degree_centrality(self):

143 # """

144 # Compute the degree centrality for nodes.

145

146 # See Also

147 # --------

148 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html

149 # """

150 # return Counter(nx.degree_centrality(self.directed_graph))

151

152 # def in_degree_centrality(self):

153 # """

154 # Compute the in-degree centrality for nodes.

155

156 # See Also

157 # --------

158 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html

159 # """

160 # return Counter(nx.in_degree_centrality(self.directed_graph))

161

162 # def out_degree_centrality(self):

163 # """

164 # Compute the out-degree centrality for nodes.

165

166 # See Also

167 # --------

168 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html

169 # """

170 # return Counter(nx.out_degree_centrality(self.directed_graph))

171

172 # def pagerank(self,

173 # alpha=0.85,

174 # personalization=None,

175 # max_iter=1000,

176 # tol=1e-06,

177 # nstart=None,

178 # weight='weight',

179 # dangling=None,

180 # use_directed=True,

181 # reverse=True):

182 # """

183 # Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `lum.clu.processors.doc.DirectedGraph.graph`.

184 # Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why).

185

186 # See Also

187 # --------

188 # `processors.paths.DependencyUtils.pagerank`

189 # Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank)

190 # """

191 # # check whether or not to reverse directed graph

192 # dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True)

193 # # determine graph to use

194 # graph = dg if use_directed else self.undirected_graph

195 # return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling)

196

197 # def _build_incoming(self, edges):

198 # dep_dict = defaultdict(list)

199 # for edge in edges:

200 # dep_dict[edge.destination].append((edge.source, edge.relation))

201 # return dep_dict

202

203 # def _build_outgoing(self, edges):

204 # dep_dict = defaultdict(list)

205 # for edge in edges:

206 # dep_dict[edge.source].append((edge.destination, edge.relation))

207 # return dep_dict

208

209 # def _build_labeled(self):

210 # labeled = []

211 # for out in self.outgoing:

212 # for (dest, rel) in self.outgoing[out]:

213 # labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest]))

214 # return labeled

215

216 # def _build_unlabeled(self):

217 # unlabeled = []

218 # for out in self.outgoing:

219 # for (dest, _) in self.outgoing[out]:

220 # unlabeled.append("{}_{}".format(self._words[out], self._words[dest]))

221 # return unlabeled

222

223 # def _graph_to_JSON_dict(self):

224 # dg_dict = dict()

225 # dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges]

226 # dg_dict["roots"] = self.roots

227 # return dg_dict

228

229 # def to_JSON_dict(self):

230 # return {self.kind:self._graph_to_JSON_dict()}

Coverage for python/lum/clu/processors/directed_graph.py: 100%

13 statements