diff --git a/easygraph/datasets/__init__.py b/easygraph/datasets/__init__.py
index d303baa9..035ada50 100644
--- a/easygraph/datasets/__init__.py
+++ b/easygraph/datasets/__init__.py
@@ -1,3 +1,4 @@
+# risky imports
 try:
     from easygraph.datasets.get_sample_graph import *
     from easygraph.datasets.gnn_benchmark import *
@@ -8,16 +9,27 @@
     from easygraph.datasets.karate import KarateClubDataset
     from easygraph.datasets.mathoverflow_answers import mathoverflow_answers
 
-    from .citation_graph import CitationGraphDataset
-    from .citation_graph import CiteseerGraphDataset
-    from .citation_graph import CoraBinary
-    from .citation_graph import CoraGraphDataset
-    from .citation_graph import PubmedGraphDataset
     from .ppi import LegacyPPIDataset
     from .ppi import PPIDataset
-
-except:
+except Exception as e:
     print(
         " Please install Pytorch before use graph-related datasets and"
         " hypergraph-related datasets."
     )
+
+from .amazon_photo import AmazonPhotoDataset
+from .arxiv import ArxivHEPTHDataset
+from .citation_graph import CitationGraphDataset
+from .citation_graph import CiteseerGraphDataset
+from .citation_graph import CoraBinary
+from .citation_graph import CoraGraphDataset
+from .citation_graph import PubmedGraphDataset
+from .coauthor import CoauthorCSDataset
+from .facebook_ego import FacebookEgoNetDataset
+from .flickr import FlickrDataset
+from .github import GitHubUsersDataset
+from .reddit import RedditDataset
+from .roadnet import RoadNetCADataset
+from .twitter_ego import TwitterEgoDataset
+from .web_google import WebGoogleDataset
+from .wiki_topcats import WikiTopCatsDataset
diff --git a/easygraph/datasets/amazon_photo.py b/easygraph/datasets/amazon_photo.py
new file mode 100644
index 00000000..a9295a20
--- /dev/null
+++ b/easygraph/datasets/amazon_photo.py
@@ -0,0 +1,110 @@
+import os
+
+import easygraph as eg
+import numpy as np
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import data_type_dict
+from .utils import download
+from .utils import extract_archive
+from .utils import tensor
+
+
+class AmazonPhotoDataset(EasyGraphBuiltinDataset):
+    r"""Amazon Electronics Photo co-purchase graph dataset.
+
+    Nodes represent products, and edges link products frequently co-purchased.
+    Node features are bag-of-words of product reviews. The task is to classify
+    the product category.
+
+    Statistics:
+
+    - Nodes: 7,650
+    - Edges: 119,081
+    - Number of Classes: 8
+    - Features: 745
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Raw file directory to download/contains the input data directory. Default: None
+    force_reload : bool, optional
+        Whether to reload the dataset. Default: False
+    verbose : bool, optional
+        Whether to print out progress information. Default: True
+    transform : callable, optional
+        A transform that takes in a :class:`~easygraph.Graph` object and returns
+        a transformed version. The :class:`~easygraph.Graph` object will be
+        transformed before every access.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import AmazonPhotoDataset
+    >>> dataset = AmazonPhotoDataset()
+    >>> g = dataset[0]
+    >>> print(g.number_of_nodes())
+    >>> print(g.number_of_edges())
+    >>> print(g.nodes[0]['feat'].shape)
+    >>> print(g.nodes[0]['label'])
+    >>> print(dataset.num_classes)
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "amazon_photo"
+        url = "https://data.dgl.ai/dataset/amazon_co_buy_photo.zip"
+        super(AmazonPhotoDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        path = os.path.join(self.raw_path, "amazon_co_buy_photo.npz")
+        data = np.load(path)
+
+        adj = sp.csr_matrix(
+            (data["adj_data"], data["adj_indices"], data["adj_indptr"]),
+            shape=data["adj_shape"],
+        )
+
+        features = sp.csr_matrix(
+            (data["attr_data"], data["attr_indices"], data["attr_indptr"]),
+            shape=data["attr_shape"],
+        ).todense()
+
+        labels = data["labels"]
+
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*adj.nonzero())))
+
+        for i in range(features.shape[0]):
+            g.add_node(i, feat=np.array(features[i]).squeeze(), label=int(labels[i]))
+
+        self._g = g
+        self._num_classes = len(np.unique(labels))
+
+        if self.verbose:
+            print("Finished loading AmazonPhoto dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {features.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "AmazonPhotoDataset only contains one graph"
+        if self._g is None:
+            raise ValueError("Graph has not been loaded or processed correctly.")
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/arxiv.py b/easygraph/datasets/arxiv.py
new file mode 100644
index 00000000..cfce499b
--- /dev/null
+++ b/easygraph/datasets/arxiv.py
@@ -0,0 +1,106 @@
+"""Arxiv HEP-TH Citation Network
+
+This dataset represents the citation network of preprints from the High Energy Physics - Theory (HEP-TH) category on arXiv, covering the period from January 1993 to April 2003.
+
+Each node corresponds to a paper, and a directed edge from paper A to paper B indicates that A cites B.
+
+No features or labels are included in this dataset.
+
+Statistics:
+- Nodes: 27,770
+- Edges: 352,807
+- Features: None
+- Labels: None
+
+Reference:
+J. Leskovec, J. Kleinberg and C. Faloutsos, "Graphs over Time: Densification Laws, Shrinking Diameters and Possible Explanations,"
+in KDD 2005. Dataset: https://snap.stanford.edu/data/cit-HepTh.html
+"""
+
+import gzip
+import os
+import shutil
+
+import easygraph as eg
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+
+
+class ArxivHEPTHDataset(EasyGraphBuiltinDataset):
+    r"""Arxiv HEP-TH citation network dataset.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import ArxivHEPTHDataset
+    >>> dataset = ArxivHEPTHDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "cit-HepTh"
+        url = "https://snap.stanford.edu/data/cit-HepTh.txt.gz"
+        super(ArxivHEPTHDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        r"""Download and decompress the .txt.gz file."""
+        compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+        extracted_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        download(self.url, path=compressed_path)
+
+        if not os.path.exists(self.raw_path):
+            os.makedirs(self.raw_path)
+
+        with gzip.open(compressed_path, "rb") as f_in:
+            with open(extracted_path, "wb") as f_out:
+                shutil.copyfileobj(f_in, f_out)
+
+    def process(self):
+        graph = eg.DiGraph()  # Citation network is directed
+        edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        with open(edge_list_path, "r") as f:
+            for line in f:
+                if line.startswith("#") or line.strip() == "":
+                    continue
+                u, v = map(int, line.strip().split())
+                graph.add_edge(u, v)
+
+        self._g = graph
+        self._num_nodes = graph.number_of_nodes()
+        self._num_edges = graph.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading Arxiv HEP-TH dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "ArxivHEPTHDataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
diff --git a/easygraph/datasets/citation_graph.py b/easygraph/datasets/citation_graph.py
index a7e268d9..3795d678 100644
--- a/easygraph/datasets/citation_graph.py
+++ b/easygraph/datasets/citation_graph.py
@@ -1,6 +1,5 @@
-"""Cora, citeseer, pubmed dataset.
+"""Cora, citeseer, pubmed dataset."""
 
-"""
 from __future__ import absolute_import
 
 import os
@@ -53,9 +52,10 @@ class CitationGraphDataset(EasyGraphBuiltinDataset):
     reorder : bool
         Whether to reorder the graph using :func:`~eg.reorder_graph`. Default: False.
     """
+
     _urls = {
         "cora_v2": "dataset/cora_v2.zip",
-        "citeseer": "dataset/citeSeer.zip",
+        "citeseer": "dataset/citeseer.zip",
         "pubmed": "dataset/pubmed.zip",
     }
 
diff --git a/easygraph/datasets/coauthor.py b/easygraph/datasets/coauthor.py
new file mode 100644
index 00000000..fe90f734
--- /dev/null
+++ b/easygraph/datasets/coauthor.py
@@ -0,0 +1,118 @@
+"""CoauthorCS Dataset
+
+This dataset contains a co-authorship network of authors who submitted papers to CS category.
+Each node represents an author and edges represent co-authorships.
+Node features are bag-of-words representations of keywords in the author's papers.
+The task is node classification, with labels indicating the primary field of study.
+
+Statistics:
+- Nodes: 18333
+- Edges: 81894
+- Feature Dim: 6805
+- Classes: 15
+
+Source: https://github.com/dmlc/dgl/tree/master/examples/pytorch/cluster_gcn
+"""
+
+import os
+
+import easygraph as eg
+import numpy as np
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import data_type_dict
+from .utils import download
+from .utils import extract_archive
+from .utils import tensor
+
+
+class CoauthorCSDataset(EasyGraphBuiltinDataset):
+    r"""CoauthorCS citation network dataset.
+
+    Nodes are authors, and edges indicate co-authorship relationships. Each node
+    has a bag-of-words feature vector and a label denoting the primary research field.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Transform to apply to the graph on access.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import CoauthorCSDataset
+    >>> dataset = CoauthorCSDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    >>> print("Feature shape:", g.nodes[0]['feat'].shape)
+    >>> print("Label:", g.nodes[0]['label'])
+    >>> print("Number of classes:", dataset.num_classes)
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "coauthor_cs"
+        url = "https://data.dgl.ai/dataset/coauthor_cs.zip"
+        super(CoauthorCSDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        path = os.path.join(self.raw_path, "coauthor_cs.npz")
+        data = np.load(path)
+
+        # Reconstruct adjacency matrix
+        adj = sp.csr_matrix(
+            (data["adj_data"], data["adj_indices"], data["adj_indptr"]),
+            shape=data["adj_shape"],
+        )
+
+        # Reconstruct feature matrix
+        features = sp.csr_matrix(
+            (data["attr_data"], data["attr_indices"], data["attr_indptr"]),
+            shape=data["attr_shape"],
+        ).todense()
+
+        labels = data["labels"]
+
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*adj.nonzero())))
+
+        for i in range(features.shape[0]):
+            g.add_node(i, feat=np.array(features[i]).squeeze(), label=int(labels[i]))
+
+        self._g = g
+        self._num_classes = len(np.unique(labels))
+
+        if self.verbose:
+            print("Finished loading CoauthorCS dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {features.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "CoauthorCSDataset only contains one graph"
+        if self._g is None:
+            raise ValueError("Graph has not been loaded or processed correctly.")
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/dynamic/email_enron.py b/easygraph/datasets/dynamic/email_enron.py
index aad3087e..0fb24f78 100644
--- a/easygraph/datasets/dynamic/email_enron.py
+++ b/easygraph/datasets/dynamic/email_enron.py
@@ -73,8 +73,7 @@ def download(self):
             self.load_data = data
 
     def process(self):
-        """Loads input data from data directory and transfer to target graph for better analysis
-        """
+        """Loads input data from data directory and transfer to target graph for better analysis"""
 
         self._g, edge_feature_list = dict_to_hypergraph(self.load_data, is_dynamic=True)
 
diff --git a/easygraph/datasets/dynamic/email_eu.py b/easygraph/datasets/dynamic/email_eu.py
index 236e6ecd..51c150ed 100644
--- a/easygraph/datasets/dynamic/email_eu.py
+++ b/easygraph/datasets/dynamic/email_eu.py
@@ -70,8 +70,7 @@ def download(self):
             self.load_data = data
 
     def process(self):
-        """Loads input data from data directory and transfer to target graph for better analysis
-        """
+        """Loads input data from data directory and transfer to target graph for better analysis"""
         self._g, edge_feature_list = dict_to_hypergraph(self.load_data, is_dynamic=True)
         self._g.ndata["hyperedge_feature"] = tensor(
             range(1, len(edge_feature_list) + 1)
diff --git a/easygraph/datasets/dynamic/hospital_lyon.py b/easygraph/datasets/dynamic/hospital_lyon.py
index 6784d8f9..e7f93566 100644
--- a/easygraph/datasets/dynamic/hospital_lyon.py
+++ b/easygraph/datasets/dynamic/hospital_lyon.py
@@ -10,7 +10,9 @@
 
 class Hospital_Lyon(EasyGraphDataset):
     _urls = {
-        "hospital_lyon": "easygraph-data-hospital-lyon/-/raw/main/hospital-lyon.json?ref_type=heads&inline=false",
+        "hospital_lyon": (
+            "easygraph-data-hospital-lyon/-/raw/main/hospital-lyon.json?ref_type=heads&inline=false"
+        ),
     }
 
     def __init__(
@@ -119,8 +121,7 @@ def download(self):
             self.load_data = data
 
     def process(self):
-        """Loads input data from data directory and transfer to target graph for better analysis
-        """
+        """Loads input data from data directory and transfer to target graph for better analysis"""
 
         self._g, edge_feature_list = self.preprocess(self.load_data, is_dynamic=True)
         self._g.ndata["hyperedge_feature"] = tensor(
diff --git a/easygraph/datasets/facebook_ego.py b/easygraph/datasets/facebook_ego.py
new file mode 100644
index 00000000..33eabf33
--- /dev/null
+++ b/easygraph/datasets/facebook_ego.py
@@ -0,0 +1,109 @@
+"""Facebook Ego-Net Dataset
+
+This dataset contains a subset of Facebook’s social network collected from
+survey participants in the SNAP EgoNet project. Nodes represent users, and
+edges indicate friendship links between them.
+
+Each ego network is centered on a user and includes their friend connections
+and friend-to-friend connections. The `.circles` files contain labeled groups
+(i.e., communities) of friends identified by the ego user.
+
+This version processes all ego-nets as a single undirected graph. Node features
+are not provided. Labels (circles) are optional and not included by default.
+
+Statistics (based on merged graph):
+- Nodes: ~4,000+
+- Edges: ~88,000+
+- Features: None
+- Classes: None
+
+Reference:
+J. McAuley and J. Leskovec, “Learning to Discover Social Circles in Ego Networks,”
+in NIPS, 2012. [https://snap.stanford.edu/data/egonets-Facebook.html]
+"""
+
+import os
+
+import easygraph as eg
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+from .utils import extract_archive
+
+
+class FacebookEgoNetDataset(EasyGraphBuiltinDataset):
+    r"""Facebook Ego-Net social network dataset.
+
+    Each node is a user, and edges represent friendship. The dataset
+    includes 10 ego networks centered on different users.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import FacebookEgoNetDataset
+    >>> dataset = FacebookEgoNetDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "facebook"
+        url = "https://snap.stanford.edu/data/facebook.tar.gz"
+        super(FacebookEgoNetDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        parent_dir = os.path.join(self.raw_path, "facebook")
+        g = eg.Graph()
+
+        # Iterate over all .edges files in the subdirectory
+        for filename in os.listdir(parent_dir):
+            if filename.endswith(".edges"):
+                edge_file = os.path.join(parent_dir, filename)
+
+                with open(edge_file, "r") as f:
+                    for line in f:
+                        u, v = map(int, line.strip().split())
+                        g.add_edge(u, v)
+
+        self._g = g
+        self._num_nodes = g.number_of_nodes()
+        self._num_edges = g.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading Facebook Ego-Net dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "FacebookEgoNetDataset only contains one merged graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    def download(self):
+        r"""Automatically download data and extract it."""
+        if self.url is not None:
+            archive_path = os.path.join(self.raw_dir, self.name + ".tar.gz")
+            download(self.url, path=archive_path)
+            extract_archive(archive_path, self.raw_path)
diff --git a/easygraph/datasets/flickr.py b/easygraph/datasets/flickr.py
new file mode 100644
index 00000000..022308a8
--- /dev/null
+++ b/easygraph/datasets/flickr.py
@@ -0,0 +1,129 @@
+import json
+import os
+
+import easygraph as eg
+import numpy as np
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import data_type_dict
+from .utils import tensor
+
+
+class FlickrDataset(EasyGraphBuiltinDataset):
+    r"""Flickr dataset for node classification.
+
+    Nodes are images and edges represent social tags co-occurrence.
+    Node features are precomputed image embeddings. Labels indicate image categories.
+
+    Statistics:
+    - Nodes: 89,250
+    - Edges: 899,756
+    - Classes: 7
+    - Feature dim: 500
+
+    Source: GraphSAINT (https://arxiv.org/abs/1907.04931)
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Custom directory to download the dataset. Default: None (uses standard cache dir).
+    force_reload : bool, optional
+        Whether to re-download and reprocess. Default: False.
+    verbose : bool, optional
+        Whether to print loading progress. Default: False.
+    transform : callable, optional
+        A transform applied to the graph on access.
+    reorder : bool, optional
+        Whether to apply graph reordering for locality (requires torch). Default: False.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import FlickrDataset
+    >>> ds = FlickrDataset(verbose=True)
+    >>> g = ds[0]
+    >>> print(g.number_of_nodes(), g.number_of_edges(), ds.num_classes)
+    >>> print(g.nodes[0]['feat'].shape, g.nodes[0]['label'])
+    """
+
+    def __init__(
+        self,
+        raw_dir=None,
+        force_reload=False,
+        verbose=False,
+        transform=None,
+        reorder=False,
+    ):
+        name = "flickr"
+        url = self._get_dgl_url("dataset/flickr.zip")
+        self._reorder = reorder
+        super(FlickrDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        # Load adjacency
+        coo = sp.load_npz(os.path.join(self.raw_path, "adj_full.npz"))
+        g = eg.Graph()
+        g.add_edges_from(list(zip(*coo.nonzero())))
+
+        # Load features
+        feats = np.load(os.path.join(self.raw_path, "feats.npy"))
+        # Load labels
+        with open(os.path.join(self.raw_path, "class_map.json")) as f:
+            class_map = json.load(f)
+            labels = np.array([class_map[str(i)] for i in range(feats.shape[0])])
+
+        # Load train/val/test splits
+        with open(os.path.join(self.raw_path, "role.json")) as f:
+            role = json.load(f)
+        train_mask = np.zeros(feats.shape[0], dtype=bool)
+        train_mask[role["tr"]] = True
+        val_mask = np.zeros(feats.shape[0], dtype=bool)
+        val_mask[role["va"]] = True
+        test_mask = np.zeros(feats.shape[0], dtype=bool)
+        test_mask[role["te"]] = True
+
+        # Attach node data
+        for i in range(feats.shape[0]):
+            g.add_node(i, feat=feats[i].astype(np.float32), label=int(labels[i]))
+        g.graph["train_mask"] = train_mask
+        g.graph["val_mask"] = val_mask
+        g.graph["test_mask"] = test_mask
+
+        self._g = g
+        self._num_classes = int(labels.max() + 1)
+        if self.verbose:
+            print("Loaded Flickr dataset")
+            print(
+                f" Nodes: {g.number_of_nodes()}, Edges: {g.number_of_edges()}, Features: {feats.shape[1]}, Classes: {self._num_classes}"
+            )
+
+    def __getitem__(self, idx):
+        assert idx == 0, "FlickrDataset contains only one graph"
+        g = self._g
+        # transfer mask info
+        g.graph["train_mask"] = g.graph.pop("train_mask")
+        g.graph["val_mask"] = g.graph.pop("val_mask")
+        g.graph["test_mask"] = g.graph.pop("test_mask")
+        return self._transform(g) if self._transform else g
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
+
+    @staticmethod
+    def _get_dgl_url(path):
+        from .utils import _get_dgl_url
+
+        return _get_dgl_url(path)
diff --git a/easygraph/datasets/github.py b/easygraph/datasets/github.py
new file mode 100644
index 00000000..e0aebda1
--- /dev/null
+++ b/easygraph/datasets/github.py
@@ -0,0 +1,125 @@
+"""GitHub Users Social Network Dataset (musae_git)
+
+This dataset represents a directed social network of GitHub users collected in 2019.
+Nodes represent GitHub developers, and a directed edge from user A to user B indicates that A follows B.
+
+Each node also includes:
+- Features: User profile and activity-based features.
+- Labels: Developer's project area (e.g., machine learning, web dev, etc.)
+
+Statistics:
+- Nodes: 37,700
+- Edges: 289,003
+- Feature dim: 5,575
+- Classes: 2
+
+Reference:
+J. Leskovec et al. "SNAP Datasets: Stanford Large Network Dataset Collection",
+https://snap.stanford.edu/data/github-social.html
+"""
+
+import csv
+import json
+import os
+
+import easygraph as eg
+import numpy as np
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+from .utils import extract_archive
+
+
+class GitHubUsersDataset(EasyGraphBuiltinDataset):
+    r"""GitHub developers social graph (musae_git).
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store raw data. Default: None
+    force_reload : bool, optional
+        Force re-download and processing. Default: False
+    verbose : bool, optional
+        Print processing information. Default: True
+    transform : callable, optional
+        Transform to apply to the graph on load.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import GitHubUsersDataset
+    >>> dataset = GitHubUsersDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    >>> print("Feature shape:", g.nodes[0]['feat'].shape)
+    >>> print("Label:", g.nodes[0]['label'])
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "musae_git"
+        url = "https://snap.stanford.edu/data/git_web_ml.zip"
+        super(GitHubUsersDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        archive = os.path.join(self.raw_dir, self.name + ".zip")
+        download(self.url, path=archive)
+        extract_archive(archive, self.raw_path)
+
+    def process(self):
+        g = eg.DiGraph()
+        base_path = os.path.join(self.raw_path, "git_web_ml")
+
+        # Load node features
+        with open(os.path.join(base_path, "musae_git_features.json"), "r") as f:
+            features = json.load(f)
+
+        # Load labels
+        labels = {}
+        with open(os.path.join(base_path, "musae_git_target.csv"), "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                node_id = int(row["id"])
+                labels[node_id] = int(row["ml_target"])
+
+        # Load edges
+        with open(os.path.join(base_path, "musae_git_edges.csv"), "r") as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                u, v = int(row["id_1"]), int(row["id_2"])
+                g.add_edge(u, v)
+
+        # Add node attributes
+        for node_id in g.nodes:
+            feat = np.array(features[str(node_id)], dtype=np.float32)
+            label = labels.get(node_id, -1)
+            g.add_node(node_id, feat=feat, label=label)
+
+        self._g = g
+        self._num_classes = len(set(labels.values()))
+
+        if self.verbose:
+            print("Finished loading GitHub Users dataset.")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  Feature dim: {feat.shape[0]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "GitHubUsersDataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/graph_dataset_base.py b/easygraph/datasets/graph_dataset_base.py
index 4f433e81..b1d831be 100644
--- a/easygraph/datasets/graph_dataset_base.py
+++ b/easygraph/datasets/graph_dataset_base.py
@@ -1,5 +1,4 @@
-"""Basic EasyGraph Dataset
-"""
+"""Basic EasyGraph Dataset"""
 
 from __future__ import absolute_import
 
diff --git a/easygraph/datasets/ppi.py b/easygraph/datasets/ppi.py
index 06c350cb..950a434c 100644
--- a/easygraph/datasets/ppi.py
+++ b/easygraph/datasets/ppi.py
@@ -1,4 +1,5 @@
-""" PPIDataset for inductive learning. """
+"""PPIDataset for inductive learning."""
+
 import json
 import os
 
diff --git a/easygraph/datasets/reddit.py b/easygraph/datasets/reddit.py
new file mode 100644
index 00000000..a5e39493
--- /dev/null
+++ b/easygraph/datasets/reddit.py
@@ -0,0 +1,104 @@
+import os
+
+import easygraph as eg
+import numpy as np
+import scipy.sparse as sp
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import data_type_dict
+from .utils import download
+from .utils import extract_archive
+from .utils import tensor
+
+
+class RedditDataset(EasyGraphBuiltinDataset):
+    r"""Reddit posts graph (Sept 2014) for community (subreddit) classification.
+
+    Statistics:
+    - Nodes: ~232,965
+    - Edges: ~114 million (approx.)
+    - Features per node: 602
+    - Classes: number of subreddit communities
+
+    Data are split by post-day: first 20 days train, then validation (30%), test (rest).
+
+    Parameters
+    ----------
+    self_loop : bool
+        Add self-loop edges if True.
+    raw_dir, force_reload, verbose, transform : same as EasyGraphBuiltinDataset
+    """
+
+    def __init__(
+        self,
+        self_loop=False,
+        raw_dir=None,
+        force_reload=False,
+        verbose=True,
+        transform=None,
+    ):
+        name = "reddit"
+        url = "https://data.dgl.ai/dataset/reddit.zip"
+        self.self_loop = self_loop
+        super().__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def process(self):
+        # Expect two files extracted: reddit_data.npz & reddit_graph.npz
+        data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
+        feat = data["feature"]  # shape [N, 602]
+        labels = data["label"]  # shape [N]
+        split = data["node_types"]  # 1=train,2=val,3=test
+
+        # Load adjacency
+        adj = sp.load_npz(os.path.join(self.raw_path, "reddit_graph.npz"))
+        src, dst = adj.nonzero()
+        if self.self_loop:
+            self_loops = np.arange(adj.shape[0])
+            src = np.concatenate([src, self_loops])
+            dst = np.concatenate([dst, self_loops])
+        edges = list(zip(src, dst))
+
+        # Build graph
+        g = eg.Graph()
+        g.add_edges_from(edges)
+
+        # Assign node features, labels, and masks
+        for i in range(feat.shape[0]):
+            g.add_node(
+                i,
+                feat=feat[i],
+                label=int(labels[i]),
+                train_mask=(split[i] == 1),
+                val_mask=(split[i] == 2),
+                test_mask=(split[i] == 3),
+            )
+
+        self._g = g
+        self._num_classes = int(np.max(labels) + 1)
+
+        if self.verbose:
+            print("Loaded Reddit dataset:")
+            print(f"  NumNodes: {g.number_of_nodes()}")
+            print(f"  NumEdges: {g.number_of_edges()}")
+            print(f"  NumFeats: {feat.shape[1]}")
+            print(f"  NumClasses: {self._num_classes}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "RedditDataset only contains one graph"
+        return self._g if self.transform is None else self.transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    @property
+    def num_classes(self):
+        return self._num_classes
diff --git a/easygraph/datasets/roadnet.py b/easygraph/datasets/roadnet.py
new file mode 100644
index 00000000..1d7bfa8a
--- /dev/null
+++ b/easygraph/datasets/roadnet.py
@@ -0,0 +1,107 @@
+"""RoadNet-CA Dataset
+
+This dataset represents the road network of California.
+Nodes correspond to intersections, and edges represent roads connecting them.
+
+The data is undirected and unweighted. No features or labels are provided.
+
+Statistics:
+- Nodes: 1,965,206
+- Edges: 2,766,607
+- Features: None
+- Labels: None
+
+Reference:
+J. Leskovec and A. Krevl, “SNAP Datasets: Stanford Large Network Dataset Collection,”
+https://snap.stanford.edu/data/roadNet-CA.html
+"""
+
+import gzip
+import os
+import shutil
+
+import easygraph as eg
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+
+
+class RoadNetCADataset(EasyGraphBuiltinDataset):
+    r"""Road network of California (RoadNet-CA)
+
+    Nodes are road intersections and edges are roads connecting them.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import RoadNetCADataset
+    >>> dataset = RoadNetCADataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "roadNet-CA"
+        url = "https://snap.stanford.edu/data/roadNet-CA.txt.gz"
+        super(RoadNetCADataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        r"""Download and decompress the .txt.gz file."""
+        compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+        extracted_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        download(self.url, path=compressed_path)
+
+        if not os.path.exists(self.raw_path):
+            os.makedirs(self.raw_path)
+
+        with gzip.open(compressed_path, "rb") as f_in:
+            with open(extracted_path, "wb") as f_out:
+                shutil.copyfileobj(f_in, f_out)
+
+    def process(self):
+        graph = eg.Graph()  # Undirected road network
+        edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        with open(edge_list_path, "r") as f:
+            for line in f:
+                if line.startswith("#") or line.strip() == "":
+                    continue
+                u, v = map(int, line.strip().split())
+                graph.add_edge(u, v)
+
+        self._g = graph
+        self._num_nodes = graph.number_of_nodes()
+        self._num_edges = graph.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading RoadNet-CA dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "RoadNetCADataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
diff --git a/easygraph/datasets/twitter_ego.py b/easygraph/datasets/twitter_ego.py
new file mode 100644
index 00000000..7b631214
--- /dev/null
+++ b/easygraph/datasets/twitter_ego.py
@@ -0,0 +1,65 @@
+import gzip
+import os
+
+import easygraph as eg
+
+from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
+from easygraph.datasets.utils import download
+from easygraph.datasets.utils import extract_archive
+
+
+class TwitterEgoDataset(EasyGraphBuiltinDataset):
+    r"""
+    Twitter Ego Network Dataset
+
+    The Twitter dataset was collected from public sources and contains a large ego-network of Twitter users.
+    The combined network includes 81K edges among 81K users.
+
+    Source: J. McAuley and J. Leskovec, Stanford SNAP, 2012
+    URL: https://snap.stanford.edu/data/egonets-Twitter.html
+    File used: https://snap.stanford.edu/data/twitter_combined.txt.gz
+    """
+
+    def __init__(self):
+        super(TwitterEgoDataset, self).__init__(
+            name="twitter_ego",
+            url="https://snap.stanford.edu/data/twitter_combined.txt.gz",
+            force_reload=False,
+        )
+
+    def download(self):
+        gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
+        download(self.url, path=gz_path)
+        extract_archive(gz_path, self.raw_path)
+
+    def process(self):
+        import gzip
+
+        import easygraph as eg
+
+        gz_path = os.path.join(self.raw_path, "twitter_combined.txt.gz")
+        txt_path = os.path.join(self.raw_path, "twitter_combined.txt")
+
+        if not os.path.exists(txt_path):
+            with gzip.open(gz_path, "rt") as f_in, open(txt_path, "w") as f_out:
+                f_out.writelines(f_in)
+
+        G = eg.Graph()
+        edge_count = 0
+        with open(txt_path, "r") as f:
+            for line in f:
+                u, v = map(int, line.strip().split())
+                G.add_edge(u, v)
+                edge_count += 1
+
+        self._graphs = [G]
+        self._graph = G
+        self._processed = True
+
+    def __getitem__(self, idx):
+        if self._graph is not None:
+            return self._graph
+        elif self._graphs:
+            return self._graphs[idx]
+        else:
+            return None
diff --git a/easygraph/datasets/web_google.py b/easygraph/datasets/web_google.py
new file mode 100644
index 00000000..97597299
--- /dev/null
+++ b/easygraph/datasets/web_google.py
@@ -0,0 +1,118 @@
+"""Web-Google Dataset
+
+This dataset is a web graph based on Google's web pages and their hyperlink
+structure, as crawled by the Stanford WebBase project in 2002.
+
+Each node represents a web page, and a directed edge from u to v indicates
+a hyperlink from page u to page v.
+
+Statistics:
+- Nodes: 875713
+- Edges: 5105039
+- Features: None
+- Labels: None
+
+Reference:
+J. Leskovec, A. Rajaraman, J. Ullman, “Mining of Massive Datasets.”
+Dataset from SNAP: https://snap.stanford.edu/data/web-Google.html
+"""
+
+import gzip
+import os
+import shutil
+
+import easygraph as eg
+
+from easygraph.classes.graph import Graph
+
+from .graph_dataset_base import EasyGraphBuiltinDataset
+from .utils import download
+from .utils import extract_archive
+
+
+class WebGoogleDataset(EasyGraphBuiltinDataset):
+    r"""Web-Google hyperlink network dataset.
+
+    Parameters
+    ----------
+    raw_dir : str, optional
+        Directory to store the raw downloaded files. Default: None
+    force_reload : bool, optional
+        Whether to re-download and process the dataset. Default: False
+    verbose : bool, optional
+        Whether to print detailed processing logs. Default: True
+    transform : callable, optional
+        Optional transform to apply on the graph.
+
+    Examples
+    --------
+    >>> from easygraph.datasets import WebGoogleDataset
+    >>> dataset = WebGoogleDataset()
+    >>> g = dataset[0]
+    >>> print("Nodes:", g.number_of_nodes())
+    >>> print("Edges:", g.number_of_edges())
+    """
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        name = "web-Google"
+        url = "https://snap.stanford.edu/data/web-Google.txt.gz"
+        super(WebGoogleDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        r"""Download and extract .gz edge list."""
+        if self.url is not None:
+            file_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+            download(self.url, path=file_path)
+            extract_archive(file_path, self.raw_path)
+
+    def process(self):
+        graph = eg.DiGraph()  # Web-Google is directed
+        edge_list_path = os.path.join(self.raw_path, self.name + ".txt")
+
+        with open(edge_list_path, "r") as f:
+            for line in f:
+                if line.startswith("#") or line.strip() == "":
+                    continue
+                u, v = map(int, line.strip().split())
+                graph.add_edge(u, v)
+
+        self._g = graph
+        self._num_nodes = graph.number_of_nodes()
+        self._num_edges = graph.number_of_edges()
+
+        if self.verbose:
+            print("Finished loading Web-Google dataset.")
+            print(f"  NumNodes: {self._num_nodes}")
+            print(f"  NumEdges: {self._num_edges}")
+
+    def __getitem__(self, idx):
+        assert idx == 0, "WebGoogleDataset only contains one graph"
+        return self._g if self._transform is None else self._transform(self._g)
+
+    def __len__(self):
+        return 1
+
+    def download(self):
+        r"""Download and decompress the .txt.gz file."""
+        if self.url is not None:
+            compressed_path = os.path.join(self.raw_dir, self.name + ".txt.gz")
+            extracted_path = os.path.join(self.raw_path, self.name + ".txt")
+
+            # Download .gz file
+            download(self.url, path=compressed_path)
+
+            # Ensure output directory exists
+            if not os.path.exists(self.raw_path):
+                os.makedirs(self.raw_path)
+
+            # Decompress manually
+            with gzip.open(compressed_path, "rb") as f_in:
+                with open(extracted_path, "wb") as f_out:
+                    shutil.copyfileobj(f_in, f_out)
diff --git a/easygraph/datasets/wiki_topcats.py b/easygraph/datasets/wiki_topcats.py
new file mode 100644
index 00000000..9c337d5f
--- /dev/null
+++ b/easygraph/datasets/wiki_topcats.py
@@ -0,0 +1,105 @@
+"""Wikipedia Top Categories Dataset (wiki-topcats)
+
+This dataset is a directed graph of Wikipedia articles restricted to
+top-level categories (at least 100 articles), capturing the largest
+strongly connected component.
+
+Statistics:
+- Nodes: 1,791,489
+- Edges: 28,511,807
+- Categories: 17,364
+- Overlapping labels per node
+
+Source:
+H. Yin, A. Benson, J. Leskovec, D. Gleich.
+"Local Higher-order Graph Clustering", KDD 2017
+Data: https://snap.stanford.edu/data/wiki-topcats.html
+"""
+
+import gzip
+import os
+
+import easygraph as eg
+
+from easygraph.datasets.graph_dataset_base import EasyGraphBuiltinDataset
+from easygraph.datasets.utils import download
+from easygraph.datasets.utils import extract_archive
+
+
+class WikiTopCatsDataset(EasyGraphBuiltinDataset):
+    """Wikipedia Top Categories Snapshot from 2011 (SNAP)"""
+
+    def __init__(self, raw_dir=None, force_reload=False, verbose=True, transform=None):
+        super(WikiTopCatsDataset, self).__init__(
+            name="wiki_topcats",
+            url="https://snap.stanford.edu/data/wiki-topcats.txt.gz",
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
+
+    def download(self):
+        # Download the main graph file
+        gz_path = os.path.join(self.raw_dir, "wiki-topcats.txt.gz")
+        download(self.url, path=gz_path)
+
+        # Also download category info and page names
+        cat_url = "https://snap.stanford.edu/data/wiki-topcats-categories.txt.gz"
+        names_url = "https://snap.stanford.edu/data/wiki-topcats-page-names.txt.gz"
+        download(
+            cat_url, path=os.path.join(self.raw_dir, "wiki-topcats-categories.txt.gz")
+        )
+        download(
+            names_url, path=os.path.join(self.raw_dir, "wiki-topcats-page-names.txt.gz")
+        )
+
+    def process(self):
+        raw = self.raw_dir
+
+        # Decompress and read edges
+        edge_gz = os.path.join(raw, "wiki-topcats.txt.gz")
+        edge_txt = os.path.join(raw, "wiki-topcats.txt")
+        if not os.path.exists(edge_txt):
+            with gzip.open(edge_gz, "rt") as fin, open(edge_txt, "w") as fout:
+                fout.writelines(fin)
+        G = eg.DiGraph()
+        edge_count = 0
+        with open(edge_txt, "r") as f:
+            for line in f:
+                u, v = map(int, line.strip().split())
+                G.add_edge(u, v)
+                edge_count += 1
+        if self.verbose:
+            print(f"Loaded graph: {G.number_of_nodes()} nodes, {edge_count} edges")
+
+        # Compress node names
+        names_gz = os.path.join(raw, "wiki-topcats-page-names.txt.gz")
+        names = {}
+        with gzip.open(names_gz, "rt") as f:
+            for idx, line in enumerate(f):
+                names[idx] = line.strip()
+
+        # Load categories
+        cats_gz = os.path.join(raw, "wiki-topcats-categories.txt.gz")
+        labels = {}  # mapping: node -> list of category strings
+        with gzip.open(cats_gz, "rt") as f:
+            for idx, line in enumerate(f):
+                categories = line.strip().split(";")
+                categories = [cat.strip() for cat in categories if cat.strip()]
+                labels[idx] = categories
+
+        # Attach node features: empty, and node labels
+        for n in G.nodes:
+            G.add_node(n, name=names.get(n, ""), label=labels.get(n, []))
+
+        self._graph = G
+        self._graphs = [G]
+        self._processed = True
+
+    def __getitem__(self, idx):
+        assert idx == 0
+        return self._graph
+
+    def __len__(self):
+        return 1
diff --git a/easygraph/model/hypergraphs/hwnn.py b/easygraph/model/hypergraphs/hwnn.py
index 37684c39..980bd39e 100644
--- a/easygraph/model/hypergraphs/hwnn.py
+++ b/easygraph/model/hypergraphs/hwnn.py
@@ -39,7 +39,7 @@ def __init__(
 
     def forward(self, X: torch.Tensor, hgs: list) -> torch.Tensor:
         r"""The forward function.
-        
+
         Parameters:
             ``X`` (``torch.Tensor``): Input vertex feature matrix. Size :math:`(N, C_{in})`.
             ``hg`` (``eg.Hypergraph``): The hypergraph structure that contains :math:`N` vertices.
diff --git a/easygraph/nn/convs/hypergraphs/hwnn_conv.py b/easygraph/nn/convs/hypergraphs/hwnn_conv.py
index ea7ea563..7c1fa7e8 100644
--- a/easygraph/nn/convs/hypergraphs/hwnn_conv.py
+++ b/easygraph/nn/convs/hypergraphs/hwnn_conv.py
@@ -44,7 +44,7 @@ def init_parameters(self):
 
     def forward(self, X: torch.Tensor, hg: Hypergraph) -> torch.Tensor:
         r"""The forward function.
-        
+
         Parameters:
             X (``torch.Tensor``): Input vertex feature matrix. Size :math:`(N, C_{in})`.
             hg (``eg.Hypergraph``): The hypergraph structure that contains :math:`N` vertices.