Created
January 8, 2026 08:18
-
-
Save abikoushi/d986d60d4e1fdf13f02abb81fb405bbf to your computer and use it in GitHub Desktop.
冗長性のあるワンホットエンコーディングによるパラメータ化の係数をフルランクの計画行列の係数に変換する(切片項なし)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(moltenNMF) | |
| library(Matrix) | |
| df = expand.grid(letters[1:3], LETTERS[1:2]) | |
| X = sparse_onehot(~., data = df) | |
| A = matrix(c(1,0,0,0, | |
| 0,1,0,0, | |
| 0,0,1,0, | |
| 0,0,0,0, | |
| 0,0,0,1), byrow = TRUE, nrow = 5) | |
| print(A) | |
| Xs = sparse.model.matrix(~.-1, data = df) | |
| print(all(X %*% A == Xs)) | |
| B = matrix(c(1,0,0,1,0, | |
| 0,1,0,1,0, | |
| 0,0,1,1,0, | |
| 0,0,0,-1,1), byrow = TRUE, nrow = 4) | |
| print(B) | |
| print(all(Xs %*% B == X)) | |
| clen = c(4, 2, 2) | |
| full_to_onehot <- function(clen){ | |
| n_full = sum(clen - 1) + 1 | |
| n_onehot = sum(clen) | |
| B = matrix(0, n_full, n_onehot) | |
| B[1:clen[1],1:clen[1]] <- diag(clen[1]) | |
| cumlen <- cumsum(clen) | |
| cumlen_shift <- cumsum(c(clen[1],clen[-1]-1) ) | |
| for(k in seq_len(length(cumlen)-1)){ | |
| B[1:cumlen_shift[1],cumlen[k]+1] <- 1 | |
| for(i in (cumlen[k]+1):cumlen[k+1]){ | |
| B[cumlen_shift[k]+1,i] <- 1 | |
| } | |
| B[cumlen_shift[k]+1,cumlen[k]+1] <- -1 | |
| } | |
| return(B) | |
| } | |
| df = expand.grid(x1=letters[1:4], x2=LETTERS[1:2], x3=factor(1:2)) | |
| Xs = sparse.model.matrix(~x1+x2+x3-1, data = df) | |
| A = full_to_onehot(c(4, 2, 2)) | |
| ### | |
| set_attr_modelmat <- function(X) { | |
| if (!is.null(attr(X, "assign"))) { | |
| attr(X, "indices") <- c(0L, cumsum(rle(attr(X, "assign"))$lengths)) | |
| } | |
| labs <- names(attr(X, "contrasts")) | |
| if (!is.null(labs)) { | |
| attr(X, "term.labels") <- labs | |
| if (!is.null(X@Dimnames[[2]])) { | |
| attr(X, "value.labels") <- sub( | |
| paste(labs, collapse = "|"), | |
| "", | |
| X@Dimnames[[2]] | |
| ) | |
| } | |
| } | |
| return(X) | |
| } | |
| df = as.data.frame(Titanic) | |
| X_s = sparse_onehot(Freq ~ Class + Sex + Age + Survived, data = df) #冗長なワンホット | |
| L = 2 | |
| system.time({ | |
| out_s <- mNMF_vb.default(df$Freq, X = X_s, L = L, iter = 1000) | |
| }) | |
| X = sparse.model.matrix(Freq ~ Class + Sex + Age + Survived-1, data = df) #フルランク | |
| X = set_attr_modelmat(X) | |
| system.time({ | |
| out <- mNMF_vb.default(df$Freq, X = X, L = L, iter = 1000) | |
| }) | |
| plot(out_s$ELBO[-1], type = "l", lty = 2) | |
| lines(out$ELBO[-1], type = "l") | |
| V = (out$shape / out$rate) | |
| V_s = (out_s$shape / out_s$rate) | |
| fit = product_m.default(X, V) | |
| fit_s = product_m.default(X_s, V_s) | |
| abline(0, 1, lty = 2) | |
| A = full_to_onehot(c(4, 2, 2, 2)) | |
| dim(A) | |
| dim(V) | |
| all(X %*% A == X_s) | |
| V2 <- exp(A %*% log(V_s)) | |
| fit2 = product_m.default(X, V2) | |
| plot(df$Freq, fit) | |
| points(df$Freq, fit_s, col = "royalblue", pch = 2) | |
| points(df$Freq, fit2, col = "orangered", pch = 3) | |
| V = V[, order(colMeans(V), decreasing = TRUE)] | |
| V2 = V2[, order(colMeans(V2), decreasing = TRUE)] | |
| plot(V, V2) | |
| abline(0, 1, lty = 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment