2024 CaraServe: CPU-Assisted and Rank-Aware LoRA Serving for Generative LLM Inference Suyi Li, Hanfeng Lu, Tianyuan Wu, Minchen Yu, Qizhen Weng, Xusheng Chen, Yizhou Shan, Binhang Yuan, and Wei Wang arXiv preprint arXiv:2401.11240, 2024 Bib HTML PDF @article{li2024caraserve, title = {CaraServe: CPU-Assisted and Rank-Aware {LoRA} Serving for Generative {LLM} Inference}, author = {Li, Suyi and Lu, Hanfeng and Wu, Tianyuan and Yu, Minchen and Weng, Qizhen and Chen, Xusheng and Shan, Yizhou and Yuan, Binhang and Wang, Wei}, journal = {arXiv preprint arXiv:2401.11240}, publisher = {arXiv}, year = {2024}, } 2023 Beware of Fragmentation: Scheduling GPU-Sharing Workloads with Fragmentation Gradient Descent Qizhen Weng, Lingyun Yang, Yinghao Yu, Wei Wang, Xiaochuan Tang, Guodong Yang, and Liping Zhang In 2023 USENIX Annual Technical Conference (ATC), 2023 Bib HTML PDF Code Poster Slides @inproceedings{weng2023FGD, title = {Beware of Fragmentation: Scheduling {GPU}-Sharing Workloads with Fragmentation Gradient Descent}, author = {Weng, Qizhen and Yang, Lingyun and Yu, Yinghao and Wang, Wei and Tang, Xiaochuan and Yang, Guodong and Zhang, Liping}, booktitle = {2023 {USENIX} Annual Technical Conference (ATC)}, publisher = {{USENIX} Association}, year = {2023}, isbn = {978-1-939133-35-9}, address = {Boston, MA}, pages = {995--1008}, url = {https://www.usenix.org/conference/atc23/presentation/weng}, } 2022 MLaaS in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous GPU Clusters Qizhen Weng, Wencong Xiao, Yinghao Yu, Wei Wang, Cheng Wang, Jian He, Yong Li, Liping Zhang, Wei Lin, and Yu Ding In 19th USENIX Symposium on Networked Systems Design and Implementation (NSDI), 2022 Bib HTML PDF Code Slides @inproceedings{weng2022MLaaS, title = {{MLaaS} in the Wild: Workload Analysis and Scheduling in Large-Scale Heterogeneous {GPU} Clusters}, author = {Weng, Qizhen and Xiao, Wencong and Yu, Yinghao and Wang, Wei and Wang, Cheng and He, Jian and Li, Yong and Zhang, Liping and Lin, Wei and Ding, Yu}, booktitle = {19th USENIX Symposium on Networked Systems Design and Implementation (NSDI)}, pages = {945--960}, year = {2022}, } Workload consolidation in Alibaba clusters: the good, the bad, and the ugly Yongkang Zhang, Yinghao Yu, Wei Wang, Qiukai Chen, Jie Wu, Zuowei Zhang, Jiang Zhong, Tianchen Ding, Qizhen Weng, Lingyun Yang, and 4 more authors In 13th ACM Symposium on Cloud Computing (SoCC), 2022 Bib HTML PDF Slides @inproceedings{zhang2022Workload, title = {Workload consolidation in {Alibaba} clusters: the good, the bad, and the ugly}, author = {Zhang, Yongkang and Yu, Yinghao and Wang, Wei and Chen, Qiukai and Wu, Jie and Zhang, Zuowei and Zhong, Jiang and Ding, Tianchen and Weng, Qizhen and Yang, Lingyun and Wang, Cheng and He, Jian and Yang, Guodong and Zhang, Liping}, booktitle = {13th ACM Symposium on Cloud Computing (SoCC)}, pages = {210--225}, year = {2022}, } 2021 Accelerating Distributed Learning in Non-Dedicated Environments Chen Chen, Qizhen Weng, Wei Wang, Baochun Li, and Bo Li IEEE Transactions on Cloud Computing (TCC), 2021 Bib HTML PDF @article{chen2021Accelerating, title = {Accelerating Distributed Learning in Non-Dedicated Environments}, author = {Chen, Chen and Weng, Qizhen and Wang, Wei and Li, Baochun and Li, Bo}, journal = {IEEE Transactions on Cloud Computing (TCC)}, year = {2021}, publisher = {IEEE}, } 2020 Metis: Learning to schedule long-running applications in shared container clusters at scale Luping Wang, Qizhen Weng, Wei Wang, Chen Chen, and Bo Li In International Conference for High Performance Computing, Networking, Storage and Analysis (SC), 2020 Bib HTML PDF Code Slides @inproceedings{wang2020Metis, title = {Metis: Learning to schedule long-running applications in shared container clusters at scale}, author = {Wang, Luping and Weng, Qizhen and Wang, Wei and Chen, Chen and Li, Bo}, booktitle = {International Conference for High Performance Computing, Networking, Storage and Analysis (SC)}, pages = {1--17}, year = {2020}, organization = {IEEE}, } Semi-dynamic load balancing: efficient distributed learning in non-dedicated environments Chen Chen, Qizhen Weng, Wei Wang, Baochun Li, and Bo Li In 11th ACM Symposium on Cloud Computing (SoCC), 2020 Bib HTML PDF @inproceedings{chen2020Semi, title = {Semi-dynamic load balancing: efficient distributed learning in non-dedicated environments}, author = {Chen, Chen and Weng, Qizhen and Wang, Wei and Li, Baochun and Li, Bo}, booktitle = {11th ACM Symposium on Cloud Computing (SoCC)}, pages = {431--446}, year = {2020}, } 2019 APSys Towards framework-independent, non-intrusive performance characterization for dataflow computation Huangshi Tian, Qizhen Weng, and Wei Wang In Proceedings of the 10th ACM SIGOPS Asia-Pacific Workshop on Systems (APSys), 2019 Bib HTML PDF @inproceedings{tian2019Towards, title = {Towards framework-independent, non-intrusive performance characterization for dataflow computation}, author = {Tian, Huangshi and Weng, Qizhen and Wang, Wei}, booktitle = {Proceedings of the 10th ACM SIGOPS Asia-Pacific Workshop on Systems (APSys)}, pages = {54--60}, year = {2019}, } 2018 SoCC Fast distributed deep learning via worker-adaptive batch sizing Chen Chen, Qizhen Weng, Wei Wang, Baochun Li, and Bo Li In 9th ACM Symposium on Cloud Computing (SoCC), 2018 Bib HTML PDF @inproceedings{chen2018Fast, title = {Fast distributed deep learning via worker-adaptive batch sizing}, author = {Chen, Chen and Weng, Qizhen and Wang, Wei and Li, Baochun and Li, Bo}, booktitle = {9th ACM Symposium on Cloud Computing (SoCC)}, pages = {521--521}, year = {2018}, } ICDCS Opus: Fair and efficient cache sharing for in-memory data analytics Yinghao Yu, Wei Wang, Jun Zhang, Qizhen Weng, and Khaled Ben Letaief In 38th IEEE International Conference on Distributed Computing Systems (ICDCS), 2018 Bib HTML PDF @inproceedings{yu2018Opus, title = {Opus: Fair and efficient cache sharing for in-memory data analytics}, author = {Yu, Yinghao and Wang, Wei and Zhang, Jun and Weng, Qizhen and Letaief, Khaled Ben}, booktitle = {38th IEEE International Conference on Distributed Computing Systems (ICDCS)}, pages = {154--164}, year = {2018}, organization = {IEEE}, }