Coverage for python/lum/clu/processors/directed_graph.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.6.7, created at 2024-11-17 18:41 +0000

1from pydantic import BaseModel, Field 

2import typing 

3 

4__all__ = ["DirectedGraph"] 

5 

6 

7 

8 

9class Edge(BaseModel): 

10 

11 source: int = Field(description="0-based index of token serving as relation's source") 

12 destination: int = Field(description="0-based index of token serving as relation's destination") 

13 relation: str = Field(description="label for relation") 

14 

15class DirectedGraph(BaseModel): 

16 

17 STANFORD_BASIC_DEPENDENCIES: typing.ClassVar[str] = "stanford-basic" 

18 STANFORD_COLLAPSED_DEPENDENCIES: typing.ClassVar[str] = "stanford-collapsed" 

19 

20 roots: list[int] = Field(description="Roots of the directed graph") 

21 edges: list[Edge] = Field(description="the directed edges that comprise the graph") 

22 

23 """ 

24 Storage class for directed graphs. 

25 

26 

27 Parameters 

28 ---------- 

29 kind : str 

30 The name of the directed graph. 

31 

32 deps : dict 

33 A dictionary of {edges: [{source, destination, relation}], roots: [int]} 

34 

35 words : [str] 

36 A list of the word form of the tokens from the originating `Sentence`. 

37 

38 Attributes 

39 ---------- 

40 _words : [str] 

41 A list of the word form of the tokens from the originating `Sentence`. 

42 

43 roots : [int] 

44 A list of indices for the syntactic dependency graph's roots. Generally this is a single token index. 

45 

46 edges: list[lum.clu.processors.doc.Edge] 

47 A list of `lum.clu.processors.doc.Edge` 

48 

49 incoming : A dictionary of {int -> [int]} encoding the incoming edges for each node in the graph. 

50 

51 outgoing : A dictionary of {int -> [int]} encoding the outgoing edges for each node in the graph. 

52 

53 labeled : [str] 

54 A list of strings where each element in the list represents an edge encoded as source index, relation, and destination index ("source_relation_destination"). 

55 

56 unlabeled : [str] 

57 A list of strings where each element in the list represents an edge encoded as source index and destination index ("source_destination"). 

58 

59 graph : networkx.Graph 

60 A `networkx.graph` representation of the `DirectedGraph`. Used by `shortest_path` 

61 

62 Methods 

63 ------- 

64 bag_of_labeled_dependencies_from_tokens(form) 

65 Produces a list of syntactic dependencies where each edge is labeled with its grammatical relation. 

66 bag_of_unlabeled_dependencies_from_tokens(form) 

67 Produces a list of syntactic dependencies where each edge is left unlabeled without its grammatical relation. 

68 """ 

69 

70 # def __init__(self, kind, deps, words): 

71 # NLPDatum.__init__(self) 

72 # self._words = [w.lower() for w in words] 

73 # self.kind = kind 

74 # self.roots = deps.get("roots", []) 

75 # self.edges = [Edge(e["source"], e["destination"], e["relation"]) for e in deps["edges"]] 

76 # self.incoming = self._build_incoming(self.edges) 

77 # self.outgoing = self._build_outgoing(self.edges) 

78 # self.labeled = self._build_labeled() 

79 # self.unlabeled = self._build_unlabeled() 

80 # self.directed_graph = DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=False) 

81 # self.undirected_graph = self.directed_graph.to_undirected() 

82 

83 # def __unicode__(self): 

84 # return self.edges 

85 

86 # def __eq__(self, other): 

87 # if isinstance(other, self.__class__): 

88 # return self.to_JSON() == other.to_JSON() 

89 # else: 

90 # return False 

91 

92 # def __ne__(self, other): 

93 # return not self.__eq__(other) 

94 

95 # def __hash__(self): 

96 # return hash(self.to_JSON()) 

97 

98 # def shortest_paths(self, start, end): 

99 # """ 

100 # Find the shortest paths in the syntactic depedency graph 

101 # between the provided start and end nodes. 

102 

103 # Parameters 

104 # ---------- 

105 # start : int or [int] 

106 # A single token index or list of token indices serving as the start of the graph traversal. 

107 

108 # end : int or [int] 

109 # A single token index or list of token indices serving as the end of the graph traversal. 

110 

111 # See Also 

112 # -------- 

113 # `processors.paths.DependencyUtils.shortest_path` 

114 # """ 

115 # paths = DependencyUtils.shortest_paths(self.undirected_graph, start, end) 

116 # return None if not paths else [DependencyUtils.retrieve_edges(self, path) for path in paths] 

117 

118 # def shortest_path(self, start, end, scoring_func=lambda path: -len(path)): 

119 # """ 

120 # Find the shortest path in the syntactic depedency graph 

121 # between the provided start and end nodes. 

122 

123 # Parameters 

124 # ---------- 

125 # start : int or [int] 

126 # A single token index or list of token indices serving as the start of the graph traversal. 

127 

128 # end : int or [int] 

129 # A single token index or list of token indices serving as the end of the graph traversal. 

130 

131 # scoring_func : function 

132 # A function that scores each path in a list of [(source index, directed relation, destination index)] paths. Each path has the form [(source index, relation, destination index)]. 

133 # The path with the maximum score will be returned. 

134 

135 # See Also 

136 # -------- 

137 # `processors.paths.DependencyUtils.shortest_path` 

138 # """ 

139 # paths = self.shortest_paths(start, end) 

140 # return None if not paths else max(paths, key=scoring_func) 

141 

142 # def degree_centrality(self): 

143 # """ 

144 # Compute the degree centrality for nodes. 

145 

146 # See Also 

147 # -------- 

148 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html 

149 # """ 

150 # return Counter(nx.degree_centrality(self.directed_graph)) 

151 

152 # def in_degree_centrality(self): 

153 # """ 

154 # Compute the in-degree centrality for nodes. 

155 

156 # See Also 

157 # -------- 

158 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html 

159 # """ 

160 # return Counter(nx.in_degree_centrality(self.directed_graph)) 

161 

162 # def out_degree_centrality(self): 

163 # """ 

164 # Compute the out-degree centrality for nodes. 

165 

166 # See Also 

167 # -------- 

168 # https://networkx.github.io/documentation/development/reference/algorithms.centrality.html 

169 # """ 

170 # return Counter(nx.out_degree_centrality(self.directed_graph)) 

171 

172 # def pagerank(self, 

173 # alpha=0.85, 

174 # personalization=None, 

175 # max_iter=1000, 

176 # tol=1e-06, 

177 # nstart=None, 

178 # weight='weight', 

179 # dangling=None, 

180 # use_directed=True, 

181 # reverse=True): 

182 # """ 

183 # Measures node activity in a `networkx.Graph` using a thin wrapper around `networkx` implementation of pagerank algorithm (see `networkx.algorithms.link_analysis.pagerank`). Use with `lum.clu.processors.doc.DirectedGraph.graph`. 

184 # Note that by default, the directed graph is reversed in order to highlight predicate-argument nodes (refer to pagerank algorithm to understand why). 

185 

186 # See Also 

187 # -------- 

188 # `processors.paths.DependencyUtils.pagerank` 

189 # Method parameters correspond to those of [`networkx.algorithms.link_analysis.pagerank`](https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank) 

190 # """ 

191 # # check whether or not to reverse directed graph 

192 # dg = self.directed_graph if not reverse else DependencyUtils.build_networkx_graph(roots=self.roots, edges=self.edges, name=self.kind, reverse=True) 

193 # # determine graph to use 

194 # graph = dg if use_directed else self.undirected_graph 

195 # return DependencyUtils.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol, nstart=nstart, weight=weight, dangling=dangling) 

196 

197 # def _build_incoming(self, edges): 

198 # dep_dict = defaultdict(list) 

199 # for edge in edges: 

200 # dep_dict[edge.destination].append((edge.source, edge.relation)) 

201 # return dep_dict 

202 

203 # def _build_outgoing(self, edges): 

204 # dep_dict = defaultdict(list) 

205 # for edge in edges: 

206 # dep_dict[edge.source].append((edge.destination, edge.relation)) 

207 # return dep_dict 

208 

209 # def _build_labeled(self): 

210 # labeled = [] 

211 # for out in self.outgoing: 

212 # for (dest, rel) in self.outgoing[out]: 

213 # labeled.append("{}_{}_{}".format(self._words[out], rel.upper(), self._words[dest])) 

214 # return labeled 

215 

216 # def _build_unlabeled(self): 

217 # unlabeled = [] 

218 # for out in self.outgoing: 

219 # for (dest, _) in self.outgoing[out]: 

220 # unlabeled.append("{}_{}".format(self._words[out], self._words[dest])) 

221 # return unlabeled 

222 

223 # def _graph_to_JSON_dict(self): 

224 # dg_dict = dict() 

225 # dg_dict["edges"] = [e.to_JSON_dict() for e in self.edges] 

226 # dg_dict["roots"] = self.roots 

227 # return dg_dict 

228 

229 # def to_JSON_dict(self): 

230 # return {self.kind:self._graph_to_JSON_dict()}