index.html


<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">

    <title>
        VividDreamer
    </title>

    <link rel="icon" href="assets/favicon.ico">

    <!-- bootstrap -->
    <script src="https://code.jquery.com/jquery-3.4.1.slim.min.js" integrity="sha384-J6qa4849blE2+poT4WnyKhv5vZF5SrPo0iEjwBvKU7imGFAV0wwj1yYfoRSJoZ+n" crossorigin="anonymous"></script>
    <script src="https://cdn.jsdelivr.net/npm/popper.js@1.16.0/dist/umd/popper.min.js" integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo" crossorigin="anonymous"></script>
    <script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML" async></script>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.4.1/dist/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.4.1/dist/js/bootstrap.min.js" integrity="sha384-wfSDF2E50Y2D1uUdj0O3uMBJnjuUD4Ih7YwaYd1iqfktj0Uod8GCExl3Og8ifwB6" crossorigin="anonymous"></script>
    <!-- icon -->
    <script src="https://kit.fontawesome.com/87dc3e863a.js" crossorigin="anonymous"></script>
    <!-- font -->
    <link href="https://fonts.googleapis.com/css?family=Open+Sans" rel="stylesheet" type="text/css">
    <script type="text/x-mathjax-config">
        MathJax.Hub.Config({
            showProcessingMessages: false,
            messageStyle: "none", //不显示信息
            extensions: ["tex2jax.js"],
            jax: ["input/TeX", "output/HTML-CSS"],
            tex2jax: {
            inlineMath: [ ['$','$'], ["\\(","\\)"] ],
            displayMath: [ ['$$','$$'], ["\\[","\\]"] ],
            skipTags: ['script', 'noscript', 'style', 'textarea', 'pre','code', 'a', 'annotation', 'annotation-xml'],
            ignoreClass: 'crayon-.*' // 'crayon-'
            },
            'HTML-CSS': {
                showMathMenu: false
            }
        });
        MathJax.Hub.Queue(["Typeset",MathJax.Hub]);
    </script>
    <style>
        body {
            background: rgb(255, 255, 255) no-repeat fixed top left;
            font-family:'Open Sans', sans-serif;
        }
    </style>

</head>

<body>
  <!-- cover -->
    <section>
        <div class="jumbotron text-center mt-0" style="padding-bottom: 0px;">
            <div class="container-fluid" style="margin-top: -3em;">
                <div class="row">
                    <div class="col">
                        <!-- paper title -->
                        <h2 style="font-size:40px;">
                            <strong>VividDreamer</strong>: Towards High-Fidelity and Efficient Text-to-3D Generation
                        </h2>
                        <!-- publication -->

                        <!-- <h3 style="color:#6e6e6e;"> ICCV 2023 </h3>
                        <hr> -->

                        <!-- authors -->
                        <span>
                            <h4> 
                                <a href="https://NarcissusEx.github.io" target="_blank" style="margin-right:30px"><sup>1</sup>Zixuan Chen</a>
                                <a href="" target="_blank" style="margin-right:30px"><sup>1</sup>Ruijie Su</a>
                                <a href="" target="_blank" style="margin-right:30px"><sup>1</sup>Jiahao Zhu</a>
                                <a href="https://wanggcong.github.io" target="_blank" style="margin-right:30px"><sup>2</sup>Guangcong Wang</a>
                                <a href="https://zjjconan.github.io" target="_blank" style="margin-right:30px"><sup>1</sup>Lingxiao Yang</a>
                                <a href="https://cse.sysu.edu.cn/content/2498" target="_blank" style="margin-right:30px"><sup>1</sup>Jian-Huang Lai</a>
                            </h4>
                            <h4>
                                <a href="" target="_blank" style="margin-right:30px"><sup>3</sup>Shisong Wu</a>
                                <a href="https://cse.sysu.edu.cn/content/2478" target="_blank" style="margin-right:30px"><sup>1</sup>Xiaohua Xie</a>
                            </h4>
                        </span>
                        <div style="display: flex;justify-content: center;align-items: center;">
                            <div style="margin: 0 3%;">
                                <h4>
                                    <sup>1</sup>Sun Yat-Sen University
                                </h4>
                            </div>
                            <div style="margin: 0 3%;">
                                <h4>
                                    <sup>1</sup>Great Bay University
                                </h4>
                            </div>
                        </div>
                        <h4> 
                            <sup>3</sup>China Southern Power Grid Artificial Intelligence Technology Co., Ltd.
                        </h4>
                        <!-- links -->
                        <div style="display: flex;justify-content: center;align-items: center;">
                            <div style="margin: 0 5%;">
                                <a href="https://arxiv.org/abs/2406.14964" target="_blank">
                                    <img src="assets/paper.png", style="height: 60px;margin-bottom: 5px;">
                                    <h6><strong>Paper</strong></h6>
                                </a>
                            </div>
                            <div style="margin: 0 5%;">
                                <a href="https://github.com/NarcissusEx/VividDreamer" target="_blank">
                                    <img src="assets/github.png", style="height: 60px;margin-bottom: 5px;">
                                    <h6><strong>Code</strong></h6>
                                </a>
                            </div>
                        </div>
                    </div>
                </div>
            </div>
        </div>
    </section>

  <!-- abstract -->
    <section>
        <div class="container">
            <div class="row">
                <div class="col-12 text-center">
                <!-- <h3>Abstract</h3> -->
                    <div class="row justify-content-center" style="align-items:center; display:flex;margin-top: -1.5em;">
                        <div> 
                            <img src="assets/Examples.png" alt="VividDreamer" class="img-responsive" width="100%"/>
                        </div>
                        <div>
                            <h4><b>Text-to-3D Examples</b></h4>
                        </div>
                    </div>
                    <hr style="margin-top: 0px;">
                    <!-- <br> -->
                    <div class="row justify-content-center" style="align-items:center; display:flex; margin-top: -1em;">
                        <div> 
                            <video autoplay loop playsinline muted style="width: 100%;">
                                <source data-src="assets/process.mp4" type="video/mp4" src="assets/process.mp4">
                            </video>
                            <!-- <video width="100%" autoplay="autoplay" loop="loop" controls>
                                <source src="" type="video/mp4">
                            </video> -->
                        </div>
                        <table style="table-layout: fixed;" width="100%">
                            <tbody>
                                <td width="33%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Iron Man</b></span>"</i></p>
                                </td>
                                <td width="33%"> 
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>bichon frise</b></span> <span style="color:#52B19A"><b>wearing academic regalia</b></span>"</i></p>
                                </td>
                                <td width="33%"> 
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>plush dragon toy</b></span>"</i></p>
                                </td>
                            </tbody>
                        </table>
                        <div>
                            <h4><b>Training Process</b></h4>
                        </div>
                    </div>
                </div>
                <!-- <p class="text-justify">
                    <b>Examples of text-to-3D asset creations with our framework (a).</b> In this paper, we present <b>VividDreamer</b>, an efficient text-to-3D generation framework that can distill semantically-consistent textures and high-fidelity structures from pretrained 2D diffusion models using a novel <b>Pose-dependent Consistency Distillation Sampling</b> objective in a <b>coarse-to-fine optimization</b> manner, allowing to yield high-fidelity 3D objects (rows 1 and 2) and 3D avatars (row 3) based on the given text prompts. Specifically, <b>VividDreamer</b> achieves high training efficiency, which can create ready-to-use 3D assets within 10 minutes, while producing photorealistic 3D objects within 30 minutes <b>(b)</b>.
                </p> -->
            </div>
        </div>
    </section>

    <section>
        <div class="container">
            <div class="row">
                <div class="col-12 text-left">
                    <h3>Abstract</h3>
                </div>
                <p class="text-justify">
                    Text-to-3D generation aims to create 3D assets from text-to-image diffusion models. However, existing methods face an inherent bottleneck in generation quality because the widely-used objectives such as Score Distillation Sampling (SDS) inappropriately omit U-Net jacobians for swift generation, leading to significant bias compared to the ''true'' gradient obtained by full denoising sampling. This bias brings inconsistent updating direction, resulting in implausible 3D generation (<i>e.g.,</i> color deviation, Janus problem, and semantically inconsistent details). In this work, we propose <b>Pose-dependent Consistency Distillation Sampling</b> (PCDS), a novel yet efficient objective for diffusion-based 3D generation tasks. Specifically, <b>PCDS</b> builds the <b>pose-dependent consistency function</b> within diffusion trajectories, allowing to approximate true gradients through minimal sampling steps (1~3). Compared to SDS, <b>PCDS</b> can acquire a more accurate updating direction with the same sampling time (1 sampling step), while enabling few-step (2~3) sampling to trade compute for higher generation quality. For efficient generation, we propose a coarse-to-fine optimization strategy, which first utilizes 1-step <b>PCDS</b> to create the basic structure of 3D objects, and then gradually increases <b>PCDS</b> steps to generate fine-grained details. Extensive experiments demonstrate that our approach outperforms the state-of-the-art in generation quality and training efficiency, conspicuously alleviating the implausible 3D generation issues caused by the deviated updating direction. Specifically, <b>VividDreamer</b> can create ready-to-use 3D assets within 10 minutes, while produces photorealistic 3D objects within 30 minutes. Moreover, it can be simply applied to many 3D generative applications to yield impressive 3D assets, such as <b>3D portrait and avatar generation</b> and <b>text-to-3D editing</b>.
                </p>
            </div>
        </div>
    </section>
    <br>

  <!-- <section>
    <div class="container">
      <div class="row">
        <div class="col-12 text-center">
          <h3>Preliminary</h3>
          <div class="row justify-content-center" style="align-items:center; display:flex;">
            <div> 
              <img src="assets/preliminary.png" alt="cunerf" class="img-responsive" width="100%"/>
            </div>
          </div>
          <br>
        </div>
          <p class="text-justify" style="margin-top:-30px">
            Supervised MISR methods <b>(a)</b> need to collect considerable LR-HR pairs for training, while zero-shot MISR <b>(b)</b> and our <b>CuNeRF</b> <b>(c)</b> only train the model on each test volume itself.
            However, given a test volume, ZSMISR methods can only upsample medical images at a specific scale (one-for-one), while our <b>CuNeRF</b> can handle arbitrary upsampling scales (one-for-all). 
          </p>
        </div>
      </div>
    </div>
  </section>
  <br> -->

    <!-- <section>
        <div class="container">
            <div class="row">
                <div class="col-12 text-left">
                    <h3>Motivation</h3> 
                    <div class="row justify-content-center" style="align-items:center; display:flex;">
                        <div> 
                            <img src="assets/difference.png" alt="VividDreamer" class="img-responsive" width="100%"/>
                        </div>
                    </div>
                    <br>
                </div>
                <p class="text-justify" style="margin-top:-20px">
                    <b>Examples of different objectives. </b>Visually, the acquisition of ''true'' gradient <b>(a)</b> is a time-consuming work, requiring the full denoising sampling in each iteration. To skip such a lengthy process, Score Distillation Sampling (SDS) <b>(b)</b> directly maps the noise to data (<i>i.e.,</i> <b>pseudoGTs</b>) using 1-step DDPM sampling, but SDS struggles to acquire accurate gradients due to the intrinsic randomness brought by DDPM. On the contrary, our <b>PCDS</b> builds the <b>pose-dependent consistency function</b> $f_\phi$ from any timestep $t$ to the origin 0 within diffusion trajectories, allowing to generate accurate <b>pseudoGTs</b> and acquire precise gradients via minimal sampling steps (1~3).
                </p>
            </div>
        </div>
    </section>
    <br> -->

    <!-- <section>
        <div class="container">
            <div class="row">
                <div class="col-12 text-center">
                    <h3>Framework</h3>
                    <div class="row justify-content-center" style="align-items:center; display:flex;">
                        <div> 
                        <img src="assets/framework.png" alt="cunerf" class="img-responsive" width="100%"/>
                        </div>
                    </div>
                    <br>
                </div>
                <p class="text-justify">
                    The overall framework of our <b>CuNeRF</b>.
                    To synthesize a pixel (<font color="red">red circle</font>) with the spatial position $\mathbf{x}_t=(x, y, z)$, <b>(a)</b> <b>CuNeRF</b> first uniformly samples $N$ points as a point set $\{\hat{\mathbf{x}}_i\}^N_{i=1}$ within the cube space (<font color="orchid">purple cube</font>) centered by $\mathbf{x}_t$.
                    Then, <b>CuNeRF</b> obtains the coarse estimation (<font color="blue">blue cube</font>) by feeding the sampling points into an MLP $F_{\Theta}$ to produce the set of corresponding pixel intensity $\{c_i\}_{i=1}^N$ and volume density $\{\sigma_i\}_{i=1}^N$.    
                    <b>(b)</b> Subsequently, assuming $\sigma$ of each sampling point is only related to the distance with the cube center $\mathbf{x}_t$, <b>CuNeRF</b> computes the coarse output of the target pixel via volume integral.
                    <b>(c)</b> Finally, <b>CuNeRF</b> resamples the points under the probability density function (PDF) of coarse estimation to acquire the fine estimation (<font color="orange">orange cube</font>) of the cube.
                    The fine output is generated by the same procedures as <b>(b)</b>.
                    Since these two rendering functions are differentiable, <b>CuNeRF</b> can be optimized by minimizing the rendering loss $\mathcal{L}_{A}$.
                    The fine output is the final rendering result of the target spatial position $\mathbf{x}_t$.
                </p>
            </div>
        </div>
    </section> -->
    <!-- <br> -->


    <!-- comparison results -->
    <section>
        <div class="container">
            <div class="row">
                <div class="text-left">
                    <!-- <hr style="margin-top:0px"> -->
                    <h3>Visual Comparisons</h3>
                </div>
                <div class="text-center">
                    <table style="table-layout: fixed;" width="100%">
                        <tbody>
                            <tr>
                                <td width="15%"></td>
                                <td width="21%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;">Stable DreamFusion</p><p style="font-size: large;">(~1h)</p>
                                </td>
                                <!-- <td width="17%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;">DreamGaussian</p><p style="font-size: large;">(~3mins)</p>
                                </td> -->
                                <td width="21%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;">GaussianDreamer</p><p style="font-size: large;">(~9mins)</p>
                                </td>
                                <td width="21%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;">LucidDreamer</p><p style="font-size: large;">(~45mins)</p>
                                </td>
                                <td width="21%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;"><b>VividDreamer (Ours)</b></p><p style="font-size: large;">(~30mins)</p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </div>  
                <div class="text-center">
                    <table>
                        <tbody>
                            <tr>
                                <td width="15%">
                                    <!-- <i>"A DSLR photo of a <span style="color:#EA700D"><b>chow chow puppy</b></span>"</i> -->
                                    <p style="font-size: larger;"><i>"A DSLR photo of a <span style="color:#EA700D"><b>chow chow puppy</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_DSLR_photo_of_a_chow_chow_puppy.mp4">
                                    </video>
                                </td>
                            </tr>
                            <tr>
                                <td width="15%">
                                    <!-- <i>"A DSLR photo of a <span style="color:#EA700D"><b>chow chow puppy</b></span>"</i> -->
                                    <p style="font-size: larger;"><i>"A zoomed out DSLR photo of a <span style="color:#EA700D"><b>wizard raccoon</b></span> <span style="color:#52B19A"><b>casting a spell</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_zoomed_out_DSLR_photo_of_a_wizard_raccoon_casting_a_spell.mp4">
                                    </video>
                                </td>
                            </tr>
                            <tr>
                                <td width="15%">
                                    <!-- <i>"A DSLR photo of a <span style="color:#EA700D"><b>chow chow puppy</b></span>"</i> -->
                                    <p style="font-size: larger;"><i>"A DSLR photo of a <span style="color:#EA700D"><b>baby dragon</b></span> <span style="color:#52B19A"><b>drinking_boba</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_DSLR_photo_of_a_baby_dragon_drinking_boba.mp4">
                                    </video>
                                </td>
                            </tr>
                            <tr>
                                <td width="15%">
                                    <!-- <i>"A DSLR photo of a <span style="color:#EA700D"><b>chow chow puppy</b></span>"</i> -->
                                    <p style="font-size: larger;"><i>"A DSLR photo of <span style="color:#EA700D"><b>a tray of Sushi</b></span> <span style="color:#52B19A"><b>containing pugs</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_DSLR_photo_of_a_tray_of_Sushi_containing_pugs.mp4">
                                    </video>
                                </td>
                            </tr>
                            <tr>
                                <td width="15%">
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>goat</b></span> <span style="color:#52B19A"><b>drinking beer</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_goat_drinking_beer.mp4">
                                    </video>
                                </td>
                            </tr>
                            <tr>
                                <td width="15%">
                                    <p style="font-size: larger;"><i>"A DSLR photo of a <span style="color:#EA700D"><b>terracotta bunny</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_DSLR_photo_of_a_terracotta_bunny.mp4">
                                    </video>
                                </td>
                            </tr>
                            <!-- <tr>
                                <td width="15%">
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>capybara</b></span> <span style="color:#52B19A"><b>wearing a top hat</b></span>, <span style="color:#52B19A"><b>low poly</b></span>"</i></p>
                                </td>
                                <td width="85%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_capybara_wearing_a_top_hat_low_poly.mp4">
                                    </video>
                                </td>
                            </tr> -->
                            <!-- <tr>
                                <td width="15%">
                                    
                                </td>
                                <td width="75%">
                                    
                                </td>
                            </tr> -->
                        </tbody>
                    </table>
                    <hr>
                </div>
                <br>
                <div class="text-left">
                    <h3>Visual Comparisons (~10 minutes)</h3>
                </div>
                <div class="text-center">
                    <table style="table-layout: fixed;" width="100%">
                        <tbody>
                            <tr>
                                <td width="25%"></td>
                                <td width="20%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;">GaussianDreamer (SDS)</p><p style="font-size: large;">(~9mins)</p>
                                </td>
                                <td width="20%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;">LucidDreamer (ISM)</p><p style="font-size: large;">(~10mins)</p>
                                </td>
                                <td width="20%"> 
                                    <p style="font-size: larger;margin-bottom: 0em;"><b>VividDreamer (Ours)</b></p><p style="font-size: large;">(~10mins)</p>
                                </td>
                                <td width="15%"></td>
                            </tr>
                        </tbody>
                    </table>
                </div>
                <div class="text-center">
                    <table>
                        <tbody>
                            <tr>
                                <td width="5%"></td>
                                <td width="20%">
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>pig</b></span> <span style="color:#52B19A"><b>wearing a backpack</b></span>"</i></p>
                                </td>
                                <td width="60%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_pig_wearing_a_backpack.mp4">
                                    </video>
                                </td>
                                <td width="15%"></td>
                            </tr>
                            <tr>
                                <td width="5%"></td>
                                <td width="20%">
                                    <p style="font-size: larger;"><i>"A DSLR photo of a <span style="color:#EA700D"><b>corgi puppy</b></span>"</i></p>
                                </td>
                                <td width="60%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_DSLR_photo_of_a_corgi_puppy.mp4">
                                    </video>
                                </td>
                                <td width="15%"></td>
                            </tr>
                            <tr>
                                <td width="5%"></td>
                                <td width="20%">
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>yellow schoolbus</b></span>"</i></p>
                                </td>
                                <td width="60%">
                                    <video autoplay loop playsinline muted style="width: 100%;">
                                        <source type="video/mp4" src="assets/comp/a_yellow_schoolbus.mp4">
                                    </video>
                                </td>
                                <td width="15%"></td>
                            </tr>
                        </tbody>
                    </table>
                </div>
            </div>
            <hr>
        </div>
    </section>

    <section>
        <div class="container">
            <div class="row">
                <div class="col-12 text-left">
                    <h3>More Generated Results</h3>
                </div>
                <div class="text-center">
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/0.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>piglet</b></span> <span style="color:#52B19A"><b>sitting in a teacup</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A <span style="color:#EA700D"><b>red panda</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>shiba inu</b></span> <span style="color:#52B19A"><b>wearing golf clothes and hat</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>cocker spaniel</b></span> <span style="color:#52B19A"><b>wearing a crown</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/1.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A zoomed out DSLR photo of a <span style="color:#EA700D"><b>corgi</b></span> <span style="color:#52B19A"><b>wearing a top hat</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>squirrel</b></span> <span style="color:#52B19A"><b>dressed like a clown</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A zoomed out DSLR photo of a <span style="color:#EA700D"><b>kingfisher bird</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>mandarin duck</b></span> <span style="color:#52B19A"><b>swimming in a pond</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/2.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A <span style="color:#EA700D"><b>plush toy</b></span> of a <span style="color:#52B19A"><b>corgi nurse</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A <span style="color:#EA700D"><b>plush dragon toy</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>hippo</b></span> with <span style="color:#52B19A"><b>wearing a sweater</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A zoomed out DSLR photo of a <span style="color:#EA700D"><b>lion's mane jellyfish</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/3.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>robot dinosaur</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#52B19A"><b>shiny silver</b></span> <span style="color:#EA700D"><b>robot_cat</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>hippo</b></span> <span style="color:#52B19A"><b>made out of chocolate</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-2.5em"><i>"A DSLR photo of an <span style="color:#EA700D"><b>origami hippo</b></span> <span style="color:#52B19A"><b>in a river</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/4.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-3.5em"><i>"An <span style="color:#EA700D"><b>airplane</b></span> <span style="color:#52B19A"><b>made out of wood</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-3.5em"><i>"A <span style="color:#EA700D"><b>Panther De Ville car</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-3.5em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>steam engine train</b></span>, <span style="color:#52B19A"><b>high resolution</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-3.5em"><i>"A DSLR photo of an <span style="color:#EA700D"><b>amigurumi motorcycle</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/5.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"A delicious <span style="color:#EA700D"><b>chocolate brownie dessert</b></span> with <span style="color:#52B19A"><b>ice cream</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"A DSLR photo of an <span style="color:#EA700D"><b>ice cream sundae</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"A DSLR photo of <span style="color:#EA700D"><b>spaghetti and meatballs</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"<span style="color:#EA700D"><b>Tower Bridge</b></span> <span style="color:#52B19A"><b>made out of gingerbread and candy</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/visual/6.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"A <span style="color:#EA700D"><b>20-sided die</b></span> <span style="color:#52B19A"><b>made out of glass</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"A DSLR photo of a <span style="color:#EA700D"><b>football helmet</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"A DSLR photo of the <span style="color:#EA700D"><b>leaning tower of Pisa</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;margin-top:-1em"><i>"An <span style="color:#EA700D"><b>erupting volcano</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </div>
            </div>
            <hr>
        </div>
    </section>

    <section>
        <div class="container">
            <div class="row">
                <div class="col-12 text-left">
                    <h3>Applications</h3>
                </div>
                <div class="col-12 text-center">
                    <h4>3D Portrait Generation</h4>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/avatar/0.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>boy</b></span> <span style="color:#52B19A"><b>with facial painting</b></span>, head, HDR, photorealistic, 8K"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Barack Obama</b></span>, head, HDR, photorealistic, 8K"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"Portrait of <span style="color:#EA700D"><b>young norwegian woman</b></span>, <span style="color:#52B19A"><b>steampunk, long hair</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Robert Pattinson</b></span>, head, HDR, photorealistic, 8K"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <hr>
                    <h4>3D Avatar Generation</h4>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/avatar/1.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Iron man</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Ant man</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Bat man</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Groot</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <table style="table-layout: fixed;" width="100%">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/avatar/2.mp4">
                        </video>
                        <tbody>
                            <tr>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Sun Wukong</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"<span style="color:#EA700D"><b>Captain Marvel</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>young man</b></span> <span style="color:#52B19A"><b>wearing a turtleneck</b></span>"</i></p>
                                </td>
                                <td width="25%"> 
                                    <p style="font-size: larger;"><i>"A <span style="color:#EA700D"><b>Mediterranean</b></span> <span style="color:#52B19A"><b>with beard wearing white linen shirt</b></span>"</i></p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                    <hr>
                    <h4>Text-to-3D Editing</h4>
                    <table style="table-layout: fixed;margin-bottom: -4em;">
                        <video autoplay loop playsinline muted style="width: 100%;">
                            <source type="video/mp4" src="assets/editing/0.mp4">
                        </video>
                        <img src="assets/editing/edit.png" width="100%" style="margin-top: -3.5em; margin-bottom: -1em;"/>
                        <hr style="margin-top:0px">
                    </table>
                </div>
            </div>
            
        </div>
    </section>
    <br>
    <br>
    <br>
  <!-- citing -->
    <div class="container">
        <div class="row ">
            <div class="col-12">
                <h3>Citation</h3>
                <pre style="background-color: #e9eeef;padding: 1.25em 1.5em">
<code>@article{chen2024vividdreamer,
    title={VividDreamer: Towards High-Fidelity and Efficient Text-to-3D Generation},
    author={Chen, Zixuan and Su, Ruijie and Zhu, Jiahao and Yang, Lingxiao and Lai, Jian-Huang and Xie, Xiaohua},
    journal={arXiv preprint arXiv:2406.14964},
    year={2024}
}</code>
                </pre>
            </div>
        </div>
    </div>
    <br>
    <!-- journal={arXiv preprint arXiv:2305.18766}, -->
    <div class="container">
        <div class="row ">
            <div class="col-12">
                <h3>Acknowledgements</h3>
                <hr style="margin-top:0px">
            </div>
            <p class="text-justify">
                This project is supported by the Natural Science Foundation of China (No. 62072482), and is also supported by the Project of Guangdong Provincial Key Laboratory of Information Security Technology (Grant No. 2023B1212060026).<br>
                We also thank to <a href="https://lioryariv.github.io/" target="_blank">Lior Yariv</a> for the <a href="https://lioryariv.github.io/idr/" target="_blank">website template</a>.
            </p>
        </div>
    </div>
    <section>
        <div class="container">
            <div class="row">
                <div class="col">
                    <div class="row justify-content-center" style="text-align:center; padding:0 0 30px 0">
                        <a href="https://clustrmaps.com/site/1c39c" title="Visit tracker"><img src="https://www.clustrmaps.com/map_v2.png?d=B9FjzYCSBROZZ7ALYpj-UbKTWzReXLKIazwG1wWsr8E&cl=ffffff"></a>
                    </div>
                </div>
            </div>
        </div>
    </section>
</body>
</html>