abikoushi/overparametrize_withoutintercept.R

## overparametrize_withoutintercept.R
library(moltenNMF)
library(Matrix)

df = expand.grid(letters[1:3], LETTERS[1:2])
X = sparse_onehot(~., data = df)

A = matrix(c(1,0,0,0,
             0,1,0,0,
             0,0,1,0,
             0,0,0,0,
             0,0,0,1), byrow = TRUE, nrow = 5)

print(A)

Xs = sparse.model.matrix(~.-1, data = df)
print(all(X %*% A == Xs))

B = matrix(c(1,0,0,1,0,
             0,1,0,1,0,
             0,0,1,1,0,
             0,0,0,-1,1), byrow = TRUE, nrow = 4)


print(B)
print(all(Xs %*% B == X))

clen = c(4, 2, 2)
full_to_onehot <- function(clen){
  n_full = sum(clen - 1) + 1
  n_onehot = sum(clen)
  B = matrix(0, n_full, n_onehot)
  B[1:clen[1],1:clen[1]] <- diag(clen[1])
  cumlen <- cumsum(clen)
  cumlen_shift <- cumsum(c(clen[1],clen[-1]-1) )
  for(k in seq_len(length(cumlen)-1)){
    B[1:cumlen_shift[1],cumlen[k]+1] <- 1
    for(i in (cumlen[k]+1):cumlen[k+1]){
      B[cumlen_shift[k]+1,i] <- 1
    }
    B[cumlen_shift[k]+1,cumlen[k]+1] <- -1
  }
  return(B)
}


df = expand.grid(x1=letters[1:4], x2=LETTERS[1:2], x3=factor(1:2))
Xs = sparse.model.matrix(~x1+x2+x3-1, data = df)
A = full_to_onehot(c(4, 2, 2))

###

set_attr_modelmat <- function(X) {
  if (!is.null(attr(X, "assign"))) {
    attr(X, "indices") <- c(0L, cumsum(rle(attr(X, "assign"))$lengths))
  }
  labs <- names(attr(X, "contrasts"))
  if (!is.null(labs)) {
    attr(X, "term.labels") <- labs
    if (!is.null(X@Dimnames[[2]])) {
      attr(X, "value.labels") <- sub(
        paste(labs, collapse = "|"),
        "",
        X@Dimnames[[2]]
      )
    }
  }
  return(X)
}

df = as.data.frame(Titanic)

X_s = sparse_onehot(Freq ~ Class + Sex + Age + Survived, data = df) #冗長なワンホット

L = 2
system.time({
  out_s <- mNMF_vb.default(df$Freq, X = X_s, L = L, iter = 1000)
})


X = sparse.model.matrix(Freq ~ Class + Sex + Age + Survived-1, data = df) #フルランク
X = set_attr_modelmat(X)

system.time({
  out <- mNMF_vb.default(df$Freq, X = X, L = L, iter = 1000)
})

plot(out_s$ELBO[-1], type = "l", lty = 2)
lines(out$ELBO[-1], type = "l")

V = (out$shape / out$rate)
V_s = (out_s$shape / out_s$rate)

fit = product_m.default(X, V)
fit_s = product_m.default(X_s, V_s)


abline(0, 1, lty = 2)


A = full_to_onehot(c(4, 2, 2, 2))
dim(A)
dim(V)

all(X %*% A == X_s)

V2 <- exp(A %*% log(V_s))
fit2 = product_m.default(X, V2)
plot(df$Freq, fit)
points(df$Freq, fit_s, col = "royalblue", pch = 2)
points(df$Freq, fit2, col = "orangered", pch = 3)

V = V[, order(colMeans(V), decreasing = TRUE)]
V2 = V2[, order(colMeans(V2), decreasing = TRUE)]
plot(V, V2)
abline(0, 1, lty = 2)
	library(moltenNMF)
	library(Matrix)

	df = expand.grid(letters[1:3], LETTERS[1:2])
	X = sparse_onehot(~., data = df)

	A = matrix(c(1,0,0,0,
	0,1,0,0,
	0,0,1,0,
	0,0,0,0,
	0,0,0,1), byrow = TRUE, nrow = 5)

	print(A)

	Xs = sparse.model.matrix(~.-1, data = df)
	print(all(X %*% A == Xs))

	B = matrix(c(1,0,0,1,0,
	0,1,0,1,0,
	0,0,1,1,0,
	0,0,0,-1,1), byrow = TRUE, nrow = 4)


	print(B)
	print(all(Xs %*% B == X))

	clen = c(4, 2, 2)
	full_to_onehot <- function(clen){
	n_full = sum(clen - 1) + 1
	n_onehot = sum(clen)
	B = matrix(0, n_full, n_onehot)
	B[1:clen[1],1:clen[1]] <- diag(clen[1])
	cumlen <- cumsum(clen)
	cumlen_shift <- cumsum(c(clen[1],clen[-1]-1) )
	for(k in seq_len(length(cumlen)-1)){
	B[1:cumlen_shift[1],cumlen[k]+1] <- 1
	for(i in (cumlen[k]+1):cumlen[k+1]){
	B[cumlen_shift[k]+1,i] <- 1
	}
	B[cumlen_shift[k]+1,cumlen[k]+1] <- -1
	}
	return(B)
	}


	df = expand.grid(x1=letters[1:4], x2=LETTERS[1:2], x3=factor(1:2))
	Xs = sparse.model.matrix(~x1+x2+x3-1, data = df)
	A = full_to_onehot(c(4, 2, 2))

	###

	set_attr_modelmat <- function(X) {
	if (!is.null(attr(X, "assign"))) {
	attr(X, "indices") <- c(0L, cumsum(rle(attr(X, "assign"))$lengths))
	}
	labs <- names(attr(X, "contrasts"))
	if (!is.null(labs)) {
	attr(X, "term.labels") <- labs
	if (!is.null(X@Dimnames[[2]])) {
	attr(X, "value.labels") <- sub(
	paste(labs, collapse = "\|"),
	"",
	X@Dimnames[[2]]
	)
	}
	}
	return(X)
	}

	df = as.data.frame(Titanic)

	X_s = sparse_onehot(Freq ~ Class + Sex + Age + Survived, data = df) #冗長なワンホット

	L = 2
	system.time({
	out_s <- mNMF_vb.default(df$Freq, X = X_s, L = L, iter = 1000)
	})


	X = sparse.model.matrix(Freq ~ Class + Sex + Age + Survived-1, data = df) #フルランク
	X = set_attr_modelmat(X)

	system.time({
	out <- mNMF_vb.default(df$Freq, X = X, L = L, iter = 1000)
	})

	plot(out_s$ELBO[-1], type = "l", lty = 2)
	lines(out$ELBO[-1], type = "l")

	V = (out$shape / out$rate)
	V_s = (out_s$shape / out_s$rate)

	fit = product_m.default(X, V)
	fit_s = product_m.default(X_s, V_s)


	abline(0, 1, lty = 2)


	A = full_to_onehot(c(4, 2, 2, 2))
	dim(A)
	dim(V)

	all(X %*% A == X_s)

	V2 <- exp(A %*% log(V_s))
	fit2 = product_m.default(X, V2)
	plot(df$Freq, fit)
	points(df$Freq, fit_s, col = "royalblue", pch = 2)
	points(df$Freq, fit2, col = "orangered", pch = 3)

	V = V[, order(colMeans(V), decreasing = TRUE)]
	V2 = V2[, order(colMeans(V2), decreasing = TRUE)]
	plot(V, V2)
	abline(0, 1, lty = 2)
No results found