In this work, we introduce a novel deep learning architecture, Variable Length Embeddings (VLEs), an autoregressive model that can produce a latent representation composed of an arbitrary number of tokens. As a proof of concept, we demonstrate the capabilities of VLEs on tasks that involve reconstruction and image decomposition. We evaluate our experiments on a mix of the iNaturalist and ImageNet datasets and find that VLEs achieve comparable reconstruction results to a state of the art VAE, using less than a tenth of the parameters.
@article{arxiv.2305.09967,
title = {Variable Length Embeddings},
author = {Johnathan Chiu and Andi Gu and Matt Zhou},
journal= {arXiv preprint arXiv:2305.09967},
year = {2023}
}