@@ -38,38 +38,215 @@ Check also the
3838[ interactive tutorial] ( https://colab.research.google.com/github/arangoml/dgl-adapter/blob/master/examples/ArangoDB_DGL_Adapter.ipynb ) .
3939
4040``` py
41- from arango import ArangoClient # Python-Arango driver
42- from dgl.data import KarateClubDataset # Sample graph from DGL
41+ import dgl
42+ import torch
43+ import pandas
4344
44- # Let's assume that the ArangoDB "fraud detection" dataset is imported to this endpoint
45- db = ArangoClient(hosts = " http://localhost:8529" ).db(" _system" , username = " root" , password = " " )
45+ from arango import ArangoClient
46+ from adbdgl_adapter import ADBDGL_Adapter , ADBDGL_Controller
47+ from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder
4648
49+ # Connect to ArangoDB
50+ db = ArangoClient().db()
51+
52+ # Instantiate the adapter
4753adbdgl_adapter = ADBDGL_Adapter(db)
4854
49- # Use Case 1.1: ArangoDB to DGL via Graph name
50- dgl_fraud_graph = adbdgl_adapter.arangodb_graph_to_dgl(" fraud-detection" )
55+ # Create a DGL Heterogeneous Graph
56+ fake_hetero = dgl.heterograph({
57+ (" user" , " follows" , " user" ): (torch.tensor([0 , 1 ]), torch.tensor([1 , 2 ])),
58+ (" user" , " follows" , " topic" ): (torch.tensor([1 , 1 ]), torch.tensor([1 , 2 ])),
59+ (" user" , " plays" , " game" ): (torch.tensor([0 , 3 ]), torch.tensor([3 , 4 ])),
60+ })
61+ fake_hetero.nodes[" user" ].data[" features" ] = torch.tensor([21 , 44 , 16 , 25 ])
62+ fake_hetero.nodes[" user" ].data[" label" ] = torch.tensor([1 , 2 , 0 , 1 ])
63+ fake_hetero.nodes[" game" ].data[" features" ] = torch.tensor([[0 , 0 ], [0 , 1 ], [1 , 0 ], [1 , 1 ], [1 , 1 ]])
64+ fake_hetero.edges[(" user" , " plays" , " game" )].data[" features" ] = torch.tensor([[6 , 1 ], [1000 , 0 ]])
65+ ```
66+
67+ ### DGL to ArangoDB
68+ ``` py
69+ # ###########################
70+ # 1.1: without a Metagraph #
71+ # ###########################
5172
52- # Use Case 1.2: ArangoDB to DGL via Collection names
53- dgl_fraud_graph_2 = adbdgl_adapter.arangodb_collections_to_dgl(
54- " fraud-detection" ,
55- {" account" , " Class" , " customer" }, # Vertex collections
56- {" accountHolder" , " Relationship" , " transaction" }, # Edge collections
57- )
73+ adb_g = adbdgl_adapter.dgl_to_arangodb(" FakeHetero" , fake_hetero)
5874
59- # Use Case 1.3: ArangoDB to DGL via Metagraph
75+ # ########################
76+ # 1.2: with a Metagraph #
77+ # ########################
78+
79+ # Specifying a Metagraph provides customized adapter behaviour
6080metagraph = {
81+ " nodeTypes" : {
82+ " user" : {
83+ " features" : " user_age" , # 1) you can specify a string value for attribute renaming
84+ " label" : label_tensor_to_2_column_dataframe, # 2) you can specify a function for user-defined handling, as long as the function returns a Pandas DataFrame
85+ },
86+ # 3) You can specify set of strings if you want to preserve the same DGL attribute names for the node/edge type
87+ " game" : {" features" } # this is equivalent to {"features": "features"}
88+ },
89+ " edgeTypes" : {
90+ (" user" , " plays" , " game" ): {
91+ # 4) you can specify a list of strings for tensor dissasembly (if you know the number of node/edge features in advance)
92+ " features" : [" hours_played" , " is_satisfied_with_game" ]
93+ },
94+ },
95+ }
96+
97+ def label_tensor_to_2_column_dataframe (dgl_tensor : torch.Tensor, adb_df : pandas.DataFrame) -> pandas.DataFrame:
98+ """ A user-defined function to create two
99+ ArangoDB attributes out of the 'user' label tensor
100+
101+ :param dgl_tensor: The DGL Tensor containing the data
102+ :type dgl_tensor: torch.Tensor
103+ :param adb_df: The ArangoDB DataFrame to populate, whose
104+ size is preset to the length of **dgl_tensor**.
105+ :type adb_df: pandas.DataFrame
106+ :return: The populated ArangoDB DataFrame
107+ :rtype: pandas.DataFrame
108+ """
109+ label_map = {0 : " Class A" , 1 : " Class B" , 2 : " Class C" }
110+
111+ adb_df[" label_num" ] = dgl_tensor.tolist()
112+ adb_df[" label_str" ] = adb_df[" label_num" ].map(label_map)
113+
114+ return adb_df
115+
116+
117+ adb_g = adbdgl_adapter.dgl_to_arangodb(" FakeHetero" , fake_hetero, metagraph, explicit_metagraph = False )
118+
119+ # ######################################################
120+ # 1.3: with a Metagraph and `explicit_metagraph=True` #
121+ # ######################################################
122+
123+ # With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB.
124+ adb_g = adbdgl_adapter.dgl_to_arangodb(" FakeHetero" , fake_hetero, metagraph, explicit_metagraph = True )
125+
126+ # #######################################
127+ # 1.4: with a custom ADBDGL Controller #
128+ # #######################################
129+
130+ class Custom_ADBDGL_Controller (ADBDGL_Controller ):
131+ def _prepare_dgl_node (self , dgl_node : dict , node_type : str ) -> dict :
132+ """ Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection.
133+
134+ :param dgl_node: The DGL node object to (optionally) modify.
135+ :param node_type: The DGL Node Type of the node.
136+ :return: The DGL Node object
137+ """
138+ dgl_node[" foo" ] = " bar"
139+ return dgl_node
140+
141+ def _prepare_dgl_edge (self , dgl_edge : dict , edge_type : tuple ) -> dict :
142+ """ Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection.
143+
144+ :param dgl_edge: The DGL edge object to (optionally) modify.
145+ :param edge_type: The Edge Type of the DGL edge. Formatted
146+ as (from_collection, edge_collection, to_collection)
147+ :return: The DGL Edge object
148+ """
149+ dgl_edge[" bar" ] = " foo"
150+ return dgl_edge
151+
152+
153+ adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb(" FakeHetero" , fake_hetero)
154+ ```
155+
156+ ### ArangoDB to DGL
157+ ``` py
158+ # Start from scratch!
159+ db.delete_graph(" FakeHetero" , drop_collections = True , ignore_missing = True )
160+ adbdgl_adapter.dgl_to_arangodb(" FakeHetero" , fake_hetero)
161+
162+ # ######################
163+ # 2.1: via Graph name #
164+ # ######################
165+
166+ # Due to risk of ambiguity, this method does not transfer attributes
167+ dgl_g = adbdgl_adapter.arangodb_graph_to_dgl(" FakeHetero" )
168+
169+ # ############################
170+ # 2.2: via Collection names #
171+ # ############################
172+
173+ # Due to risk of ambiguity, this method does not transfer attributes
174+ dgl_g = adbdgl_adapter.arangodb_collections_to_dgl(" FakeHetero" , v_cols = {" user" , " game" }, e_cols = {" plays" })
175+
176+ # #####################
177+ # 2.3: via Metagraph #
178+ # #####################
179+
180+ # Transfers attributes "as is", meaning they are already formatted to DGL data standards.
181+ # Learn more about the DGL Data Standards here: https://docs.dgl.ai/guide/graph.html#guide-graph
182+ metagraph_v1 = {
61183 " vertexCollections" : {
62- " account" : {" Balance" , " account_type" , " customer_id" , " rank" },
63- " customer" : {" Name" , " rank" },
184+ # Move the "features" & "label" ArangoDB attributes to DGL as "features" & "label" Tensors
185+ " user" : {" features" , " label" }, # equivalent to {"features": "features", "label": "label"}
186+ " game" : {" dgl_game_features" : " features" },
187+ " topic" : {},
64188 },
65189 " edgeCollections" : {
66- " transaction " : {" transaction_amt " , " sender_bank_id " , " receiver_bank_id " },
67- " accountHolder " : {},
190+ " plays " : {" dgl_plays_features " : " features " },
191+ " follows " : {}
68192 },
69193}
70- dgl_fraud_graph_3 = adbdgl_adapter.arangodb_to_dgl(" fraud-detection" , metagraph)
71194
72- # Use Case 2: DGL to ArangoDB
73- dgl_karate_graph = KarateClubDataset()[0 ]
74- adb_karate_graph = adbdgl_adapter.dgl_to_arangodb(" Karate" , dgl_karate_graph)
75- ```
195+ dgl_g = adbdgl_adapter.arangodb_to_dgl(" FakeHetero" , metagraph_v1)
196+
197+ # ################################################
198+ # 2.4: via Metagraph with user-defined encoders #
199+ # ################################################
200+
201+ # Transforms attributes via user-defined encoders
202+ metagraph_v2 = {
203+ " vertexCollections" : {
204+ " Movies" : {
205+ " features" : { # Build a feature matrix from the "Action" & "Drama" document attributes
206+ " Action" : IdentityEncoder(dtype = torch.long),
207+ " Drama" : IdentityEncoder(dtype = torch.long),
208+ },
209+ " label" : " Comedy" ,
210+ },
211+ " Users" : {
212+ " features" : {
213+ " Gender" : CategoricalEncoder(), # CategoricalEncoder(mapping={"M": 0, "F": 1}),
214+ " Age" : IdentityEncoder(dtype = torch.long),
215+ }
216+ },
217+ },
218+ " edgeCollections" : {" Ratings" : {" weight" : " Rating" }},
219+ }
220+
221+ dgl_g = adbdgl_adapter.arangodb_to_dgl(" imdb" , metagraph_v2)
222+
223+ # #################################################
224+ # 2.5: via Metagraph with user-defined functions #
225+ # #################################################
226+
227+ # Transforms attributes via user-defined functions
228+ metagraph_v3 = {
229+ " vertexCollections" : {
230+ " user" : {
231+ " features" : udf_user_features, # supports named functions
232+ " label" : lambda df : torch.tensor(df[" label" ].to_list()), # also supports lambda functions
233+ },
234+ " game" : {" features" : udf_game_features},
235+ },
236+ " edgeCollections" : {
237+ " plays" : {" features" : (lambda df : torch.tensor(df[" features" ].to_list()))},
238+ },
239+ }
240+
241+ def udf_user_features (user_df : pandas.DataFrame) -> torch.Tensor:
242+ # user_df["features"] = ...
243+ return torch.tensor(user_df[" features" ].to_list())
244+
245+
246+ def udf_game_features (game_df : pandas.DataFrame) -> torch.Tensor:
247+ # game_df["features"] = ...
248+ return torch.tensor(game_df[" features" ].to_list())
249+
250+
251+ dgl_g = adbdgl_adapter.arangodb_to_dgl(" FakeHetero" , metagraph_v3)
252+ ```
0 commit comments