## ----setup, include=FALSE----------------------------------------------------- # Vignette code is executed locally (NOT_CRAN=true) but not on CRAN, where # the CPU fallback would multi-thread and trip the "CPU time > elapsed" NOTE. knitr::opts_chunk$set(eval = identical(Sys.getenv("NOT_CRAN"), "true")) ## ----------------------------------------------------------------------------- # # install.packages("ggmlR") # once on CRAN # library(ggmlR) ## ----------------------------------------------------------------------------- # data(iris) # set.seed(42) # # x <- as.matrix(iris[, 1:4]) # x <- scale(x) # standardise # # # one-hot encode species # y <- model.matrix(~ Species - 1, iris) # [150 x 3] # # idx <- sample(nrow(x)) # n_tr <- 120L # x_tr <- x[idx[1:n_tr], ] # y_tr <- y[idx[1:n_tr], ] # x_val <- x[idx[(n_tr+1):150], ] # y_val <- y[idx[(n_tr+1):150], ] ## ----------------------------------------------------------------------------- # model <- ggml_model_sequential() |> # ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> # ggml_layer_batch_norm() |> # ggml_layer_dropout(0.3, stochastic = TRUE) |> # ggml_layer_dense(16L, activation = "relu") |> # ggml_layer_dense(3L, activation = "softmax") |> # ggml_compile( # optimizer = "adam", # loss = "categorical_crossentropy", # metrics = c("accuracy") # ) # # print(model) ## ----------------------------------------------------------------------------- # model <- ggml_fit( # model, # x_tr, y_tr, # epochs = 100L, # batch_size = 32L, # validation_split = 0.0, # we supply our own val set below # verbose = 0L # ) ## ----------------------------------------------------------------------------- # score <- ggml_evaluate(model, x_val, y_val, batch_size = 16L) # cat(sprintf("Val loss: %.4f Val accuracy: %.4f\n", score$loss, score$accuracy)) # # probs <- ggml_predict(model, x_val, batch_size = 16L) # classes <- apply(probs, 1, which.max) # true <- apply(y_val, 1, which.max) # cat("Confusion matrix:\n") # print(table(true = true, predicted = classes)) ## ----------------------------------------------------------------------------- # path <- tempfile(fileext = ".rds") # ggml_save_model(model, path) # cat(sprintf("Saved: %s (%.1f KB)\n", path, file.size(path) / 1024)) # # model2 <- ggml_load_model(path) # score2 <- ggml_evaluate(model2, x_val, y_val, batch_size = 16L) # cat(sprintf("Reloaded model accuracy: %.4f\n", score2$accuracy)) ## ----------------------------------------------------------------------------- # inp <- ggml_input(shape = 4L) # # h <- inp |> ggml_layer_dense(32L, activation = "relu") |> ggml_layer_batch_norm() # h <- h |> ggml_layer_dense(16L, activation = "relu") # out <- h |> ggml_layer_dense(3L, activation = "softmax") # # model_fn <- ggml_model(inputs = inp, outputs = out) |> # ggml_compile(optimizer = "adam", loss = "categorical_crossentropy", # metrics = c("accuracy")) # # model_fn <- ggml_fit(model_fn, x_tr, y_tr, # epochs = 100L, batch_size = 32L, verbose = 0L) # # score_fn <- ggml_evaluate(model_fn, x_val, y_val, batch_size = 16L) # cat(sprintf("Functional model accuracy: %.4f\n", score_fn$accuracy)) ## ----------------------------------------------------------------------------- # data(mtcars) # set.seed(7) # # # Input group 1: engine (disp, hp, wt) # # Input group 2: transmission / gearbox (cyl, gear, carb, am) # engine <- as.matrix(scale(mtcars[, c("disp","hp","wt")])) # trans <- as.matrix(scale(mtcars[, c("cyl","gear","carb","am")])) # y_mpg <- matrix(scale(mtcars$mpg), ncol = 1L) # [32 x 1] # # # small dataset — use all for training, evaluate on same data for demo # x1 <- engine; x2 <- trans # # inp1 <- ggml_input(shape = 3L, name = "engine") # inp2 <- ggml_input(shape = 4L, name = "transmission") # # branch1 <- inp1 |> ggml_layer_dense(16L, activation = "relu") # branch2 <- inp2 |> ggml_layer_dense(16L, activation = "relu") # # merged <- ggml_layer_add(list(branch1, branch2)) # element-wise add # out_reg <- merged |> # ggml_layer_dense(8L, activation = "relu") |> # ggml_layer_dense(1L) # # model_reg <- ggml_model(inputs = list(inp1, inp2), outputs = out_reg) |> # ggml_compile(optimizer = "adam", loss = "mse") # # model_reg <- ggml_fit(model_reg, # x = list(x1, x2), y = y_mpg, # epochs = 200L, batch_size = 16L, verbose = 0L) # # preds <- ggml_predict(model_reg, x = list(x1, x2), batch_size = 16L) # cat(sprintf("Pearson r (scaled mpg): %.4f\n", cor(preds, y_mpg))) ## ----------------------------------------------------------------------------- # cb_stop <- ggml_callback_early_stopping( # monitor = "val_loss", # patience = 15L, # min_delta = 1e-4 # ) # # model_cb <- ggml_model_sequential() |> # ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> # ggml_layer_dense(3L, activation = "softmax") |> # ggml_compile(optimizer = "adam", loss = "categorical_crossentropy") # # model_cb <- ggml_fit(model_cb, x_tr, y_tr, # epochs = 300L, # batch_size = 32L, # validation_split = 0.1, # callbacks = list(cb_stop), # verbose = 0L) ## ----------------------------------------------------------------------------- # # Cosine annealing # cb_cosine <- ggml_schedule_cosine_decay(T_max = 100L, eta_min = 1e-5) # # # Step decay: halve LR every 30 epochs # cb_step <- ggml_schedule_step_decay(step_size = 30L, gamma = 0.5) # # # Reduce on plateau # cb_plateau <- ggml_schedule_reduce_on_plateau( # monitor = "val_loss", # factor = 0.5, # patience = 10L, # min_lr = 1e-6 # ) # # model_lr <- ggml_model_sequential() |> # ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> # ggml_layer_dense(3L, activation = "softmax") |> # ggml_compile(optimizer = "adam", loss = "categorical_crossentropy") # # model_lr <- ggml_fit(model_lr, x_tr, y_tr, # epochs = 150L, # batch_size = 32L, # validation_split = 0.1, # callbacks = list(cb_cosine), # verbose = 0L) ## ----------------------------------------------------------------------------- # # transpose: rows = features, cols = samples # x_tr_ag <- t(x_tr) # [4, 120] # y_tr_ag <- t(y_tr) # [3, 120] # x_val_ag <- t(x_val) # [4, 30] # y_val_ag <- t(y_val) ## ----------------------------------------------------------------------------- # ag_mod <- ag_sequential( # ag_linear(4L, 32L, activation = "relu"), # ag_batch_norm(32L), # ag_dropout(0.3), # ag_linear(32L, 16L, activation = "relu"), # ag_linear(16L, 3L) # ) # # params <- ag_mod$parameters() # opt <- optimizer_adam(params, lr = 1e-3) ## ----------------------------------------------------------------------------- # BS <- 32L # n <- ncol(x_tr_ag) # # ag_train(ag_mod) # set.seed(42) # # for (ep in seq_len(150L)) { # perm <- sample(n) # for (b in seq_len(ceiling(n / BS))) { # idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))] # xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) # yb <- y_tr_ag[, idx, drop = FALSE] # # with_grad_tape({ # loss <- ag_softmax_cross_entropy_loss(ag_mod$forward(xb), yb) # }) # grads <- backward(loss) # opt$step(grads) # opt$zero_grad() # } # # if (ep %% 50L == 0L) # cat(sprintf("epoch %d loss %.4f\n", ep, loss$data[1])) # } ## ----------------------------------------------------------------------------- # ag_eval(ag_mod) # # # forward in chunks, apply softmax manually # ag_predict_cm <- function(model, x_cm, chunk = 64L) { # n <- ncol(x_cm) # out <- matrix(0.0, nrow(model$forward(ag_tensor(x_cm[,1,drop=FALSE]))$data), n) # for (s in seq(1L, n, by = chunk)) { # e <- min(s + chunk - 1L, n) # lg <- model$forward(ag_tensor(x_cm[, s:e, drop = FALSE]))$data # ev <- exp(lg - apply(lg, 2, max)) # out[, s:e] <- ev / colSums(ev) # } # out # } # # probs_ag <- ag_predict_cm(ag_mod, x_val_ag) # [3, 30] # pred_ag <- apply(probs_ag, 2, which.max) # true_ag <- apply(y_val_ag, 1, which.max) # col-major: rows = classes # acc_ag <- mean(pred_ag == true_ag) # cat(sprintf("Autograd val accuracy: %.4f\n", acc_ag)) ## ----------------------------------------------------------------------------- # ag_mod2 <- ag_sequential( # ag_linear(4L, 64L, activation = "relu"), # ag_batch_norm(64L), # ag_dropout(0.3), # ag_linear(64L, 32L, activation = "relu"), # ag_linear(32L, 3L) # ) # params2 <- ag_mod2$parameters() # opt2 <- optimizer_adam(params2, lr = 1e-3) # sch2 <- lr_scheduler_cosine(opt2, T_max = 150L, lr_min = 1e-5) # # ag_train(ag_mod2) # set.seed(42) # # for (ep in seq_len(150L)) { # perm <- sample(n) # for (b in seq_len(ceiling(n / BS))) { # idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))] # xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) # yb <- y_tr_ag[, idx, drop = FALSE] # # with_grad_tape({ # loss2 <- ag_softmax_cross_entropy_loss(ag_mod2$forward(xb), yb) # }) # grads2 <- backward(loss2) # clip_grad_norm(params2, grads2, max_norm = 5.0) # opt2$step(grads2) # opt2$zero_grad() # } # sch2$step() # } # # ag_eval(ag_mod2) # probs2 <- ag_predict_cm(ag_mod2, x_val_ag) # acc2 <- mean(apply(probs2, 2, which.max) == true_ag) # cat(sprintf("ag + cosine + clip val accuracy: %.4f\n", acc2)) ## ----------------------------------------------------------------------------- # make_net <- function(n_in, n_hidden, n_out) { # W1 <- ag_param(matrix(rnorm(n_hidden * n_in) * sqrt(2/n_in), n_hidden, n_in)) # b1 <- ag_param(matrix(0.0, n_hidden, 1L)) # W2 <- ag_param(matrix(rnorm(n_out * n_hidden) * sqrt(2/n_hidden), n_out, n_hidden)) # b2 <- ag_param(matrix(0.0, n_out, 1L)) # # list( # forward = function(x) # ag_add(ag_matmul(W2, ag_relu(ag_add(ag_matmul(W1, x), b1))), b2), # parameters = function() list(W1=W1, b1=b1, W2=W2, b2=b2) # ) # } # # set.seed(1) # net <- make_net(4L, 32L, 3L) # opt_r <- optimizer_adam(net$parameters(), lr = 1e-3) # # for (ep in seq_len(200L)) { # perm <- sample(n) # for (b in seq_len(ceiling(n / BS))) { # idx <- perm[seq((b-1L)*BS+1L, min(b*BS, n))] # xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) # yb <- y_tr_ag[, idx, drop = FALSE] # with_grad_tape({ loss_r <- ag_softmax_cross_entropy_loss(net$forward(xb), yb) }) # gr <- backward(loss_r) # opt_r$step(gr); opt_r$zero_grad() # } # } # # probs_r <- ag_predict_cm(net, x_val_ag) # acc_r <- mean(apply(probs_r, 2, which.max) == true_ag) # cat(sprintf("Raw ag_param val accuracy: %.4f\n", acc_r)) ## ----------------------------------------------------------------------------- # # Use Vulkan GPU if available, fall back to CPU # device <- tryCatch({ # ag_device("gpu") # "gpu" # }, error = function(e) "cpu") # # cat("Running on:", device, "\n") # # # Mixed precision (f16 on GPU, f32 on CPU) # ag_dtype(if (device == "gpu") "f16" else "f32")