1:"$Sreact.fragment" 2:I[60675,["8173","static/chunks/8173-066406f34141ecb3.js","7177","static/chunks/app/layout-e5b6254cced12c56.js"],"Header"] 3:I[15244,[],""] 4:I[43866,[],""] 5:I[60766,["8173","static/chunks/8173-066406f34141ecb3.js","7177","static/chunks/app/layout-e5b6254cced12c56.js"],"GoogleAnalytics"] 6:I[6808,["7358","static/chunks/8b2dac85-3830e74a76c98279.js","2380","static/chunks/00a487b1-3ae8737144497144.js","5927","static/chunks/5927-d3ef888da69b1ef7.js","5630","static/chunks/5630-081b24e7cb02dfd5.js","1228","static/chunks/app/papers/page-9d2bbc282719d7ce.js"],"Bib"] 9:I[86213,[],"OutletBoundary"] b:I[86213,[],"MetadataBoundary"] d:I[86213,[],"ViewportBoundary"] f:I[34835,[],""] :HL["/_next/static/css/ff47c8294a96d26a.css","style"] 7:T8262,@inproceedings{KalerChWh24, author = {Tim Kaler and Xuhao Chen and Brian Wheatman and Dorothy Curtis and Bruce Hoppe and Tao B. Schardl and Charles E. Leiserson}, title = {Speedcode: Software Performance Engineering Education via the Coding of Didactic Exercises}, optbooktitle = {{IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2024 - Workshop, San Francisco, CA, USA, May 27-31, 2024}, booktitle = {EduPar}, pages = {391--394}, publisher = {{IEEE}}, year = {2024}, opturl = {https://doi.org/10.1109/IPDPSW63119.2024.00087}, doi = {10.1109/IPDPSW63119.2024.00087}, timestamp = {Fri, 09 Aug 2024 10:37:41 +0200}, biburl = {https://dblp.org/rec/conf/ipps/KalerCWCHSL24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{SchardlLe23, author = {Tao B. Schardl and I-Ting Angelina Lee}, opteditor = {Maryam Mehri Dehnavi and Milind Kulkarni and Sriram Krishnamoorthy}, title = {OpenCilk: {A} Modular and Extensible Software Infrastructure for Fast Task-Parallel Code}, optbooktitle = {Proceedings of the 28th {ACM} {SIGPLAN} Annual Symposium on Principles and Practice of Parallel Programming, PPoPP 2023, Montreal, QC, Canada, 25 February 2023 - 1 March 2023}, booktitle = {PPoPP}, pages = {189--203}, optpublisher = {{ACM}}, year = {2023}, opturl = {https://doi.org/10.1145/3572848.3577509}, doi = {10.1145/3572848.3577509}, opttimestamp = {Wed, 22 Feb 2023 11:49:05 +0100}, optbiburl = {https://dblp.org/rec/conf/ppopp/SchardlL23.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{XuScPe23, author = {Helen Xu and Tao B. Schardl and Michael Pellauer and Joel S. Emer}, title = {Optimizing Compression Schemes for Parallel Sparse Tensor Algebra}, optbooktitle = {{IEEE} High Performance Extreme Computing Conference, {HPEC} 2023, Boston, MA, USA, September 25--29, 2023}, booktitle = {{HPEC}}, pages = {1--7}, optpublisher = {{IEEE}}, year = {2023}, opturl = {https://doi.org/10.1109/HPEC58863.2023.10363624}, doi = {10.1109/HPEC58863.2023.10363624}, opttimestamp = {Mon, 22 Jan 2024 20:34:11 +0100}, optbiburl = {https://dblp.org/rec/conf/hpec/XuSPE23.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{KalerIlMu23, author = {Tim Kaler and Alexandros{-}Stavros Iliopoulos and Philip Murzynowski and Tao B. Schardl and Charles E. Leiserson and Jie Chen}, opteditor = {Dawn Song and Michael Carbin and Tianqi Chen}, title = {Communication-Efficient Graph Neural Networks with Probabilistic Neighborhood Expansion Analysis and Caching}, optbooktitle = {Proceedings of the Sixth Conference on Machine Learning and Systems, MLSys 2023, Miami, FL, USA, June 4-8, 2023}, booktitle = {MLSys}, optpublisher = {mlsys.org}, year = {2023}, url = {https://proceedings.mlsys.org/paper_files/paper/2023}, opttimestamp = {Fri, 28 Jun 2024 15:58:54 +0200}, optbiburl = {https://dblp.org/rec/conf/mlsys/KalerIMSL023.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{CarratalaSaezGoIl22, author = {Roc\'io Carratal\'a-S\'aez and Arturo Gonz\'alez-Escribano and Alexandros-Stavros Iliopoulos and Charles E. Leiserson and Charlotte Park and Isabel Rosa and Tao B. Schardl and Yuri Torres and David P. Bunde}, title = {Peachy Parallel Assignments}, optbooktitle = {{IEEE/ACM} International Workshop on Education for High Performance Computing, EduHPC 2022, Dallas, TX, USA, November 13--18, 2022}, booktitle = {EduHPC}, pages = {50--56}, optpublisher = {{IEEE}}, year = {2022}, opturl = {https://doi.org/10.1109/EduHPC56719.2022.00012}, doi = {10.1109/EduHPC56719.2022.00012}, opttimestamp = {Fri, 10 Feb 2023 18:39:54 +0100}, optbiburl = {https://dblp.org/rec/conf/eduhpc/CarratalaSaezGILPRSTB22.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{KalerStOu22, author = {Tim Kaler and Nickolas Stathas and Anne Ouyang and Alexandros-Stavros Iliopoulos and Tao B. Schardl and Charles E. Leiserson and Jie Chen}, opteditor = {Diana Marculescu and Yuejie Chi and Carole{-}Jean Wu}, title = {Accelerating Training and Inference of Graph Neural Networks with Fast Sampling and Pipelining}, optbooktitle = {Proceedings of Machine Learning and Systems 2022, MLSys 2022, Santa Clara, CA, USA, August 29 - September 1, 2022}, booktitle = {MLSys}, optpublisher = {mlsys.org}, year = {2022}, url = {https://proceedings.mlsys.org/paper_files/paper/2022}, opttimestamp = {Tue, 24 May 2022 15:18:15 +0200}, optbiburl = {https://dblp.org/rec/conf/mlsys/KalerSOISL022.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{XuZhYi22, author = {Yifan Xu and Anchengcheng Zhou and Grace Q. Yin and Kunal Agrawal and I-Ting Angelina Lee and Tao B. Schardl}, opteditor = {Cynthia A. Phillips and Bettina Speckmann}, title = {Efficient Access History for Race Detection}, optbooktitle = {Proceedings of the Symposium on Algorithm Engineering and Experiments, {ALENEX} 2022, Alexandria, VA, USA, January 9-10, 2022}, booktitle = {{ALENEX}}, pages = {117--130}, optpublisher = {{SIAM}}, year = {2022}, opturl = {https://doi.org/10.1137/1.9781611977042.10}, doi = {10.1137/1.9781611977042.10}, opttimestamp = {Mon, 11 Apr 2022 13:26:42 +0200}, optbiburl = {https://dblp.org/rec/conf/alenex/0007ZYALS22.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @incollection{LeisersonSc22, author = {Charles E. Leiserson and Tao B. Schardl}, editor = {David A. Bader}, title = {A Work-Efficient Parallel Breadth-First Search Algorithm (or How To Cope With the Nondeterminism of Reducers)}, booktitle = {Massive Graph Analytics}, pages = {3--33}, optpublisher = {Chapman and Hall/CRC}, year = {2022}, opturl = {https://doi.org/10.1201/9781003033707-2}, doi = {10.1201/9781003033707-2}, opttimestamp = {Wed, 22 Nov 2023 12:12:45 +0100}, optbiburl = {https://dblp.org/rec/books/crc/22/LeisersonS22.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @incollection{HasenplaughKaSc22, author = {William Hasenplaugh and Tim Kaler and Tao B. Schardl and Charles E. Leiserson}, editor = {David A. Bader}, title = {Ordering Heuristics for Parallel Graph Coloring}, booktitle = {Massive Graph Analytics}, pages = {193--221}, optpublisher = {Chapman and Hall/CRC}, year = {2022}, opturl = {https://doi.org/10.1201/9781003033707-11}, doi = {10.1201/9781003033707-11}, opttimestamp = {Wed, 22 Nov 2023 12:12:45 +0100}, optbiburl = {https://dblp.org/rec/books/crc/22/HasenplaughKSL22.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @incollection{KalerHaSc22, author = {Tim Kaler and William Hasenplaugh and Tao B. Schardl and Charles E. Leiserson}, editor = {David A. Bader}, title = {Executing Dynamic Data-Graph Computations Deterministically Using Chromatic Scheduling}, booktitle = {Massive Graph Analytics}, pages = {397--429}, optpublisher = {Chapman and Hall/CRC}, year = {2022}, opturl = {https://doi.org/10.1201/9781003033707-18}, doi = {10.1201/9781003033707-18}, opttimestamp = {Wed, 22 Nov 2023 12:12:45 +0100}, optbiburl = {https://dblp.org/rec/books/crc/22/KalerHSL22.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{HandlemanRaLe21, author = {Aaron Handleman and Arthur G. Rattew and I-Ting Angelina Lee and Tao B. Schardl}, title = {A Hybrid Scheduling Scheme for Parallel Loops}, optbooktitle = {35th {IEEE} International Parallel and Distributed Processing Symposium, {IPDPS} 2021, Portland, OR, USA, May 17-21, 2021}, booktitle = {{IPDPS}}, pages = {587--598}, optpublisher = {{IEEE}}, year = {2021}, opturl = {https://doi.org/10.1109/IPDPS49936.2021.00067}, doi = {10.1109/IPDPS49936.2021.00067}, opttimestamp = {Fri, 02 Jul 2021 14:10:40 +0200}, optbiburl = {https://dblp.org/rec/conf/ipps/HandlemanRLS21.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{KalerScXi21, author = {Tim Kaler and Tao B. Schardl and Brian Xie and Charles E. Leiserson and Jie Chen and Aldo Pareja and Georgios Kollias}, opteditor = {Michael Schapira}, title = {{PARAD}: {A} Work-Efficient Parallel Algorithm for Reverse-Mode Automatic Differentiation}, booktitle = {{APOCS}}, pages = {144--158}, optpublisher = {{SIAM}}, year = {2021}, opturl = {https://doi.org/10.1137/1.9781611976489.11}, doi = {10.1137/1.9781611976489.11}, opttimestamp = {Mon, 31 May 2021 15:36:24 +0200}, optbiburl = {https://dblp.org/rec/conf/apocs/KalerSXLCPK21.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @article{LeisersonThEm20, author = {Leiserson, Charles E. and Thompson, Neil C. and Emer, Joel S. and Kuszmaul, Bradley C. and Lampson, Butler W. and Sanchez, Daniel and Schardl, Tao B.}, title = {There{\textquoteright}s plenty of room at the Top: What will drive computer performance after Moore{\textquoteright}s law?}, volume = {368}, number = {6495}, elocation-id = {eaam9744}, year = {2020}, doi = {10.1126/science.aam9744}, publisher = {American Association for the Advancement of Science}, issn = {0036-8075}, opturl = {https://science.sciencemag.org/content/368/6495/eaam9744}, opteprint = {https://science.sciencemag.org/content/368/6495/eaam9744.full.pdf}, journal = {Science} } @inproceedings{ParejaDoCh20, author = {Aldo Pareja and Giacomo Domeniconi and Jie Chen and Tengfei Ma and Toyotaro Suzumura and Hiroki Kanezashi and Tim Kaler and Tao B. Schardl and Charles E. Leiserson}, title = {EvolveGCN: Evolving Graph Convolutional Networks for Dynamic Graphs}, optbooktitle = {The Thirty-Fourth {AAAI} Conference on Artificial Intelligence, {AAAI} 2020, The Thirty-Second Innovative Applications of Artificial Intelligence Conference, {IAAI} 2020, The Tenth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2020, New York, NY, USA, February 7-12, 2020}, booktitle = {AAAI}, pages = {5363--5370}, optpublisher = {{AAAI} Press}, year = {2020}, doi = {10.1609/aaai.v34i04.5984}, opturl = {https://aaai.org/ojs/index.php/AAAI/article/view/5984}, opttimestamp = {Thu, 04 Jun 2020 16:49:55 +0200}, optbiburl = {https://dblp.org/rec/conf/aaai/ParejaDCMSKKSL20.bib}, optbibsource = {dblp computer science bibliography, https://dblp.org} } @inbook{KalerKuSc20, author = {Tim Kaler and William Kuszmaul and Tao B. Schardl and Daniele Vettorel}, title = {Cilkmem: Algorithms for Analyzing the Memory High-Water Mark of Fork-Join Parallel Programs}, optbooktitle = {Symposium on Algorithmic Principles of Computer Systems}, booktitle = {APoCS}, chapter = {}, year = 2020, pages = {162--176}, doi = {10.1137/1.9781611976021.12}, opturl = {https://epubs.siam.org/doi/abs/10.1137/1.9781611976021.12}, opteprint = {https://epubs.siam.org/doi/pdf/10.1137/1.9781611976021.12}, annotation = {Best paper finalist} } @article{SchardlMoLe19, author = {Schardl, Tao B. and Moses, William S. and Leiserson, Charles E.}, title = {Tapir: Embedding Recursive Fork-Join Parallelism into {LLVM}’s Intermediate Representation}, year = {2019}, issue_date = {December 2019}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {6}, number = {4}, optissn = {2329-4949}, opturl = {https://doi.org/10.1145/3365655}, doi = {10.1145/3365655}, journal = {ACM Transactions on Parallel Computing}, month = dec, articleno = {Article 19}, numpages = {33}, keywords = {optimization, parallel computing, LLVM, OpenMP, fork-join parallelism, multicore, compiling, Cilk, control-flow graph, Tapir, serial-projection property} } @inproceedings{SchardlSa19, author = {Tao B. Schardl and Siddharth Samsi}, optbooktitle = {2019 IEEE High Performance Extreme Computing Conference (HPEC)}, booktitle = {HPEC}, title = {TapirXLA: Embedding Fork-Join Parallelism into the {XLA} Compiler in {TensorFlow} Using Tapir}, year = {2019}, volume = {}, number = {}, pages = {1--8}, keywords = {learning (artificial intelligence);linear algebra;mathematics computing;parallel processing;program compilers;software libraries;TapirXLA;recursive fork-join parallelism;machine-learning applications;compiler technology;machine-learning frameworks;compiler intermediate representation;mainstream compiler IR;machine learning;low-level parallel computation;high-level TensorFlow operations;compiler implementations;parallel running time;XLA compiler;linear-algebra library routines;Hardware;Optimization;Parallel processing;Training;Program processors;Computer architecture;Task analysis}, doi = {10.1109/HPEC.2019.8916312}, optissn = {2377-6943}, month = sep } @article{LeeSc18, author = {Lee, I-Ting Angelina and Schardl, Tao B.}, title = {Efficient Race Detection for Reducer Hyperobjects}, year = {2018}, issue_date = {September 2018}, optpublisher = {Association for Computing Machinery}, optaddress = {New York, NY, USA}, volume = {4}, number = {4}, issn = {2329-4949}, opturl = {https://doi.org/10.1145/3205914}, doi = {10.1145/3205914}, optabstract = {A multithreaded Cilk program that is ostensibly deterministic may nevertheless behave nondeterministically due to programming errors in the code. For a Cilk program that uses reducers—a general reduction mechanism supported in various Cilk dialects—such programming errors are especially challenging to debug, because the errors can expose the nondeterminism in how the Cilk runtime system manages reducers.We identify two unique types of races that arise from incorrect use of reducers in a Cilk program, and we present two algorithms to catch these races. The first algorithm, called the Peer-Set algorithm, detects view-read races, which occur when the program attempts to retrieve a value out of a reducer when the read may result in a nondeterministic value, such as before all previously spawned subcomputations that might update the reducer have necessarily returned. The second algorithm, called the SP+ algorithm, detects determinacy races—instances where a write to a memory location occurs logically in parallel with another access to that location—even when the raced-on memory locations relate to reducers. Both algorithms are provably correct, asymptotically efficient, and can be implemented efficiently in practice. We have implemented both algorithms in our prototype race detector, Rader. When running Peer-Set, Rader incurs a geometric-mean multiplicative overhead of 2.56 over running the benchmark without instrumentation. When running SP+, Rader incurs a geometric-mean multiplicative overhead of 16.94.}, journal = {ACM Transactions on Parallel Computing}, month = aug, articleno = {20}, numpages = {40}, optkeywords = {view-read race, reducers, Cilk, nondeterminism, determinacy race} } @inproceedings{SchardlLeLe18, author = {Schardl, Tao B. and Lee, I-Ting Angelina and Leiserson, Charles E.}, title = {Brief Announcement: Open Cilk}, optbooktitle = {Proceedings of the 30th on Symposium on Parallelism in Algorithms and Architectures}, optseries = {SPAA '18}, booktitle = {SPAA}, year = {2018}, optisbn = {978-1-4503-5799-9}, optlocation = {Vienna, Austria}, pages = {351--353}, numpages = {3}, opturl = {http://doi.acm.org/10.1145/3210377.3210658}, doi = {10.1145/3210377.3210658}, optacmid = {3210658}, optpublisher = {ACM}, optaddress = {New York, NY, USA}, keywords = {cilk, multi-core programming, parallel algorithms, parallel languages, productivity tools} } @inproceedings{SchardlDeDo18, author = {Schardl, Tao B. and Denniston, Tyler and Doucet, Damon and Kuszmaul, Bradley C. and Lee, I-Ting Angelina and Leiserson, Charles E.}, title = {The CSI Framework for Compiler-Inserted Program Instrumentation}, optbooktitle = {Abstracts of the 2018 ACM International Conference on Measurement and Modeling of Computer Systems}, optseries = {SIGMETRICS '18}, booktitle = {Abstracts of SIGMETRICS}, year = {2018}, optisbn = {978-1-4503-5846-0}, optlocation = {Irvine, CA, USA}, pages = {100--102}, numpages = {3}, opturl = {http://doi.acm.org/10.1145/3219617.3219657}, doi = {10.1145/3219617.3219657}, acmid = {3219657}, optpublisher = {ACM}, optaddress = {New York, NY, USA}, keywords = {compiler-inserted instrumentation, dynamic program analysis, program instrumentation} } @article{SchardlDeDo17, author = {Schardl, Tao B. and Denniston, Tyler and Doucet, Damon and Kuszmaul, Bradley C. and Lee, I-Ting Angelina and Leiserson, Charles E.}, title = {The CSI Framework for Compiler-Inserted Program Instrumentation}, optjournal = {Proc. ACM Meas. Anal. Comput. Syst.}, journal = {SIGMETRICS}, issue_date = {December 2017}, volume = {1}, number = {2}, month = dec, year = {2017}, optissn = {2476-1249}, pages = {43:1--43:25}, articleno = {43}, numpages = {25}, opturl = {http://doi.acm.org/10.1145/3154502}, doi = {10.1145/3154502}, acmid = {3154502}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {compiler-inserted instrumentation, dynamic program analysis, program instrumentation} } @inproceedings{SchardlMoLe17, author = {Schardl, Tao B. and Moses, William S. and Leiserson, Charles E.}, title = {Tapir: Embedding Fork-Join Parallelism into LLVM's Intermediate Representation}, optbooktitle = {Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, booktitle = {PPoPP}, optseries = {PPoPP '17}, year = {2017}, optisbn = {978-1-4503-4493-7}, optlocation = {Austin, Texas, USA}, pages = {249--265}, optnumpages = {17}, opturl = {http://doi.acm.org/10.1145/3018743.3018758}, doi = {10.1145/3018743.3018758}, optacmid = {3018758}, optpublisher = {ACM}, optaddress = {New York, NY, USA}, keywords = {cilk, compiling, control-flow graph, fork-join parallelism, llvm, multicore, openmp, optimization, par- allel computing, serial semantics, tapir}, annotation = {Won best paper award; invited to a special issue of \textit{ACM Transactions on Parallel Computing}} } @article{KalerHaSc16, author = {Kaler, Tim and Hasenplaugh, William and Schardl, Tao B. and Leiserson, Charles E.}, title = {Executing dynamic data-graph computations deterministically using chromatic scheduling}, journal = {ACM Transactions on Parallel Computing}, issue_date = {August 2016}, volume = {3}, number = {1}, month = jul, year = {2016}, opt_issn = {2329-4949}, pages = {2:1--2:31}, articleno = {2}, numpages = {31}, opt_url = {http://doi.acm.org/10.1145/2896850}, doi = {10.1145/2896850}, acmid = {2896850}, opt_publisher = {ACM}, opt_address = {New York, NY, USA}, keywords = {Data-graph computations, chromatic scheduling, determinism, multicore, multithreading, parallel programming, scheduling, work stealing}, opt_annotation = {Special issue for SPAA 2014} } @inproceedings{AbelDeDe16, author = {Zachary Abel and Erik D. Demaine and Martin L. Demaine and Sarah Eisenstat and Jayson Lynch and Tao B. Schardl}, title = {Who Needs Crossings? Hardness of Plane Graph Rigidity}, opt_booktitle = {Proceedings of the 32nd International Symposium on Computational Geometry (SoCG 2016)}, booktitle = {SoCG}, bookurl = {http://socg2016.cs.tufts.edu/}, opt_address = {Boston, Massachusetts}, opt_month = {June 14--18}, year = 2016, pages = {3:1--3:15}, opturl = {https://doi.org/10.4230/LIPIcs.SoCG.2016.3}, doi = {10.4230/LIPIcs.SoCG.2016.3} } @article{SuksompongLeSc16, title = {On the efficiency of localized work stealing}, journal = {Information Processing Letters}, issue_date = {February 2016}, volume = {116}, number = {2}, pages = {100--106}, month = feb, year = {2016}, opt_issn = {0020-0190}, doi = {10.1016/j.ipl.2015.10.002}, opt_url = {http://www.sciencedirect.com/science/article/pii/S0020019015001726}, author = {Warut Suksompong and Charles E. Leiserson and Tao B. Schardl}, opt_keywords = {Parallel algorithms}, opt_keywords = {Multithreaded computation}, opt_keywords = {Work stealing}, opt_keywords = {Localization } } @article{LeisersonScSu16, year = {2016}, opt_issn = {1432-4350}, journal = {Theory of Computing Systems}, issue_date = {February 2016}, volume = {58}, number = {2}, pages = {223--240}, month = feb, doi = {10.1007/s00224-015-9613-9}, title = {Upper bounds on number of steals in rooted trees}, opt_url = {http://dx.doi.org/10.1007/s00224-015-9613-9}, opt_publisher = {Springer US}, keywords = {Work stealing; Parallel algorithm; Extremal combinatorics; Binomial coefficient}, author = {Leiserson, Charles E. and Schardl, Tao B. and Suksompong, Warut}, opt_language = {English} } @article{LeeLeSc15, author = {Lee, I-Ting Angelina and Leiserson, Charles E. and Schardl, Tao B. and Zhang, Zhunping and Sukha, Jim}, title = {On-the-fly pipeline parallelism}, journal = {ACM Transactions on Parallel Computing}, issue_date = {October 2015}, volume = {2}, number = {3}, month = oct, year = {2015}, opt_issn = {2329-4949}, pages = {17:1--17:42}, opt_articleno = {17}, opt_numpages = {42}, opt_url = {http://doi.acm.org/10.1145/2809808}, doi = {10.1145/2809808}, opt_acmid = {2809808}, publisher = {ACM}, opt_address = {New York, NY, USA}, opt_keywords = {Cilk, multicore, multithreading, on-the-fly pipelining, parallel programming, pipeline parallelism, scheduling, work stealing}, opt_annotation = {Special issue for SPAA 2013} } @phdthesis{Schardl16, address = {Cambridge, MA}, author = {Tao B. Schardl}, month = sep, school = {Massachusetts Institute of Technology}, title = {Performance Engineering of Multicore Software: Developing a Science of Fast Code for the Post-Moore Era}, year = {2016}, doi = {1721.1/107290} } @inproceedings{SchardlKuLe15, author = {Schardl, Tao B. and Kuszmaul, Bradley C. and Lee, I-Ting Angelina and Leiserson, William M. and Leiserson, Charles E.}, title = {The {Cilkprof} scalability profiler}, booktitle = {SPAA}, opt_booktitle = {Proceedings of the 27th ACM on Symposium on Parallelism in Algorithms and Architectures}, opt_series = {SPAA '15}, year = {2015}, opt_isbn = {978-1-4503-3588-1}, opt_location = {Portland, Oregon, USA}, pages = {89--100}, opt_numpages = {12}, opt_url = {http://doi.acm.org/10.1145/2755573.2755603}, doi = {10.1145/2755573.2755603}, opt_acmid = {2755603}, opt_publisher = {ACM}, opt_address = {New York, NY, USA}, opt_keywords = {cilk, cilkprof, compiler instrumentation, llvm, multithreading, parallelism, performance, profiling, scalability, serial bottleneck, span, work} } @inproceedings{LeeSc15, author = {Lee, I-Ting Angelina and Schardl, Tao B.}, title = {Efficiently detecting races in Cilk programs that use reducer hyperobjects}, opt_booktitle = {Proceedings of the 27th ACM Symposium on Parallelism in Algorithms and Architectures}, opt_series = {SPAA '15}, booktitle = {SPAA}, year = {2015}, opt_isbn = {978-1-4503-3588-1}, opt_location = {Portland, Oregon, USA}, pages = {111--122}, opt_numpages = {12}, opt_url = {http://doi.acm.org/10.1145/2755573.2755599}, doi = {10.1145/2755573.2755599}, opt_acmid = {2755599}, opt_publisher = {ACM}, opt_address = {New York, NY, USA}, keywords = {cilk, determinacy race, nondeterminism, reducers, view-read race}, annotation = {Invited to a special issue of \textit{ACM Transactions on Parallel Computing}} } @inproceedings{HasenplaughKaSc14, author = {Hasenplaugh, William and Kaler, Tim and Schardl, Tao B. and Leiserson, Charles E.}, title = {Ordering heuristics for parallel graph coloring}, opt_booktitle = {Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures}, opt_series = {SPAA '14}, booktitle = {SPAA}, year = {2014}, opt_isbn = {978-1-4503-2821-0}, opt_location = {Prague, Czech Republic}, pages = {166--177}, opt_numpages = {12}, opt_url = {http://doi.acm.org/10.1145/2612669.2612697}, doi = {10.1145/2612669.2612697}, opt_acmid = {2612697}, opt_publisher = {ACM}, opt_address = {New York, NY, USA}, keywords = {cilk, graph coloring, ordering heuristics, parallel algorithms} } @inproceedings{KalerHaSc14, author = {Kaler, Tim and Hasenplaugh, William and Schardl, Tao B. and Leiserson, Charles E.}, title = {Executing dynamic data-graph computations deterministically using chromatic scheduling}, opt_booktitle = {Proceedings of the 26th ACM Symposium on Parallelism in Algorithms and Architectures}, opt_series = {SPAA '14}, booktitle = {SPAA}, year = {2014}, opt_isbn = {978-1-4503-2821-0}, opt_location = {Prague, Czech Republic}, pages = {154--165}, opt_numpages = {12}, opt_url = {http://doi.acm.org/10.1145/2612669.2612673}, doi = {10.1145/2612669.2612673}, opt_acmid = {2612673}, opt_publisher = {ACM}, opt_address = {New York, NY, USA}, keywords = {chromatic scheduling, data-graph computations, determinism, multicore, multithreading, parallel programming, reducers, work stealing}, annotation = {Invited to a special issue of \textit{ACM Transactions on Parallel Computing}} } @inproceedings{LeeLeSc13, author = {Lee, I-Ting Angelina and Leiserson, Charles E. and Schardl, Tao B. and Sukha, Jim and Zhang, Zhunping}, opt_booktitle = {Proceedings of the 25th Annual ACM Symposium on Parallelism in Algorithms and Architectures}, booktitle = {SPAA}, date-added = {2014-10-04 18:02:26 +0000}, date-modified = {2014-10-04 18:02:27 +0000}, opt_month = jul, pages = {140--151}, title = {On-the-fly pipeline parallelism}, year = {2013}, doi = {10.1145/2486159.2486174}, annotation = {Invited to a special issue of \textit{ACM Transactions on Parallel Computing}} } @article{AbelDeDe13, title = {Finding a Hamiltonian path in a cube with specified turns is hard}, author = {Zachary Abel and Erik D. Demaine and Martin L. Demaine and Sarah Eisenstat and Jayson Lynch and Tao B. Schardl}, journal = {Journal of Information Processing}, volume = {21}, number = {3}, pages = {368--377}, year = {2013}, doi = {10.2197/ipsjjip.21.368}, annotation = {Won outstanding paper award} } @article{AbelDeDe13a, author = {Abel, Zachary and Demaine, Erik D. and Demaine, Martin L. and Eisenstat, Sarah and Lynch, Jayson and Schardl, Tao B. and Shapiro-Ellowitz, Isaac}, title = {Folding equilateral plane graphs}, journal = {International Journal of Computational Geometry \& Applications}, volume = {23}, number = {02}, pages = {75--92}, year = {2013}, doi = {10.1142/S0218195913600017}, opt_url = {http://www.worldscientific.com/doi/abs/10.1142/S0218195913600017}, opt_eprint = {http://www.worldscientific.com/doi/pdf/10.1142/S0218195913600017} } @inproceedings{LeisersonScSu12, author = {Leiserson, Charles E. and Schardl, Tao B. and Sukha, Jim}, title = {Deterministic parallel random-number generation for dynamic-multithreading platforms}, booktitle = {PPoPP}, opt_booktitle = {Proceedings of the 17th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, opt_series = {PPoPP '12}, year = {2012}, opt_isbn = {978-1-4503-1160-1}, opt_location = {New Orleans, Louisiana, USA}, pages = {193--204}, opt_numpages = {12}, opt_url = {http://doi.acm.org/10.1145/2145816.2145841}, doi = {10.1145/2145816.2145841}, opt_acmid = {2145841}, opt_publisher = {ACM}, opt_address = {New York, NY, USA}, opt_keywords = {cilk, determinism, dynamic multithreading, nondeterminism, parallel computing, pedigree, random-number generator} } @inproceedings{AbelDeDe11, author = {Zachary Abel and Erik D. Demaine and Martin L. Demaine and Sarah Eisenstat and Jayson Lynch and Tao B. Schardl and Isaac Shapiro-Ellowitz}, title = {Folding equilateral plane graphs}, booktitle = {ISAAC}, year = {2011}, pages = {574--583}, doi = {10.1007/978-3-642-25591-5_59}, opt_ee = {http://dx.doi.org/10.1007/978-3-642-25591-5_59}, opt_bibsource = {DBLP, http://dblp.uni-trier.de} } @mastersthesis{Schardl10, address = {Cambridge, MA}, author = {Tao B. Schardl}, date-added = {2014-10-04 18:02:26 +0000}, date-modified = {2014-10-04 18:02:27 +0000}, month = may, school = {Massachusetts Institute of Technology}, title = {Design and analysis of a nondeterministic parallel breadth-first search algorithm}, annotation = {Awarded the Charles and Jennifer Johnson CS M.Eng. Prize}, year = {2010}, doi = {1721.1/61575} } @inproceedings{LeisersonSc10, author = {Charles E. Leiserson and Tao B. Schardl}, booktitle = {SPAA}, date-added = {2014-10-04 18:02:26 +0000}, date-modified = {2014-10-04 18:02:27 +0000}, opt_month = {June}, pages = {303--314}, opt_publisher = {ACM}, title = {A work-efficient parallel breadth-first search algorithm (or how to cope with the nondeterminism of reducers)}, year = {2010}, doi = {10.1145/1810479.1810534} } 0:{"P":null,"b":"Eh3qxYeU1yEQxh1X2uq5G","p":"","c":["","papers"],"i":false,"f":[[["",{"children":["papers",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",["$","$1","c",{"children":[null,["$","html",null,{"lang":"en","children":[["$","body",null,{"children":[["$","$L2",null,{}],["$","$L3",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L4",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[[],[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":404}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]]],"forbidden":"$undefined","unauthorized":"$undefined"}]]}],["$","$L5",null,{"gaId":"G-6TCR87FRLX"}]]}]]}],{"children":["papers",["$","$1","c",{"children":[null,["$","$L3",null,{"parallelRouterKey":"children","segmentPath":["children","papers","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L4",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","forbidden":"$undefined","unauthorized":"$undefined"}]]}],{"children":["__PAGE__",["$","$1","c",{"children":[["$","div",null,{"style":{"padding":"var(--size-gutter)"},"className":"dark:bg-zinc-900","children":[["$","main",null,{"className":"flex flex-col min-h-screen items-center","children":["$","article",null,{"className":"prose prose-zinc dark:prose-invert prose-code:before:hidden prose-code:after:hidden prose-inline-code:bg-amber-50 dark:prose-inline-code:bg-amber-950 ","children":[["$","h1",null,{"children":"Papers"}],["$","a",null,{"href":"#undefined","className":"group no-underline","children":["$","h2",null,{"id":"$undefined","children":[["$","span",null,{"className":"absolute -translate-x-[140%] opacity-0 group-hover:opacity-35","children":"#"}]," ","Featured papers"]}]}],[["$","$L6","featured-SchardlLe23",{"paper":{"author":[{"given":"Tao B.","family":"Schardl"},{"given":"I-Ting Angelina","family":"Lee"}],"container-title":"PPoPP","DOI":"10.1145/3572848.3577509","type":"paper-conference","id":"SchardlLe23","citation-key":"SchardlLe23","issued":{"date-parts":[[2023]]},"page":"189-203","title":"OpenCilk: A Modular and Extensible Software Infrastructure for Fast Task-Parallel Code","_graph":[{"type":"@biblatex/text","data":"$7"},{"type":"@biblatex/entries+list"},{"type":"@csl/list+object"}]},"idprefix":"featured-","annote":""}],["$","$L6","featured-LeisersonThEm20",{"paper":{"container-title":"Science","author":[{"given":"Charles E.","family":"Leiserson"},{"given":"Neil C.","family":"Thompson"},{"given":"Joel S.","family":"Emer"},{"given":"Bradley C.","family":"Kuszmaul"},{"given":"Butler W.","family":"Lampson"},{"given":"Daniel","family":"Sanchez"},{"given":"Tao B.","family":"Schardl"}],"DOI":"10.1126/science.aam9744","type":"article-journal","id":"LeisersonThEm20","citation-key":"LeisersonThEm20","ISSN":"0036-8075","issue":"6495","issued":{"date-parts":[[2020]]},"publisher":"American Association for the Advancement of Science","title":"There’s plenty of room at the Top: What will drive computer performance after Moore’s law?","volume":"368","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"idprefix":"featured-","annote":""}],["$","$L6","featured-SchardlMoLe17",{"paper":{"annote":"Won best paper award; invited to a special issue of ACM Transactions on Parallel Computing","author":[{"given":"Tao B.","family":"Schardl"},{"given":"William S.","family":"Moses"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"PPoPP","DOI":"10.1145/3018743.3018758","type":"paper-conference","id":"SchardlMoLe17","citation-key":"SchardlMoLe17","issued":{"date-parts":[[2017]]},"keyword":"cilk,compiling,control-flow graph,fork-join parallelism,llvm,multicore,openmp,optimization,par- allel computing,serial semantics,tapir","page":"249-265","title":"Tapir: Embedding Fork-Join Parallelism into LLVM's Intermediate Representation","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"idprefix":"featured-","annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Won best paper award; invited to a special issue of ACM Transactions on Parallel Computing."}}]}]],["$","a",null,{"href":"#undefined","className":"group no-underline","children":["$","h2",null,{"id":"$undefined","children":[["$","span",null,{"className":"absolute -translate-x-[140%] opacity-0 group-hover:opacity-35","children":"#"}]," ","All papers"]}]}],[["$","$L6","KalerChWh24",{"paper":{"author":[{"given":"Tim","family":"Kaler"},{"given":"Xuhao","family":"Chen"},{"given":"Brian","family":"Wheatman"},{"given":"Dorothy","family":"Curtis"},{"given":"Bruce","family":"Hoppe"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"EduPar","DOI":"10.1109/IPDPSW63119.2024.00087","type":"paper-conference","id":"KalerChWh24","citation-key":"KalerChWh24","issued":{"date-parts":[[2024]]},"page":"391-394","publisher":"IEEE","title":"Speedcode: Software Performance Engineering Education via the Coding of Didactic Exercises","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlLe23",{"paper":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper","annote":""}],["$","$L6","XuScPe23",{"paper":{"author":[{"given":"Helen","family":"Xu"},{"given":"Tao B.","family":"Schardl"},{"given":"Michael","family":"Pellauer"},{"given":"Joel S.","family":"Emer"}],"container-title":"HPEC","DOI":"10.1109/HPEC58863.2023.10363624","type":"paper-conference","id":"XuScPe23","citation-key":"XuScPe23","issued":{"date-parts":[[2023]]},"page":"1-7","title":"Optimizing Compression Schemes for Parallel Sparse Tensor Algebra","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerIlMu23",{"paper":{"author":[{"given":"Tim","family":"Kaler"},{"given":"Alexandros-Stavros","family":"Iliopoulos"},{"given":"Philip","family":"Murzynowski"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"},{"given":"Jie","family":"Chen"}],"container-title":"MLSys","type":"paper-conference","id":"KalerIlMu23","citation-key":"KalerIlMu23","issued":{"date-parts":[[2023]]},"title":"Communication-Efficient Graph Neural Networks with Probabilistic Neighborhood Expansion Analysis and Caching","URL":"https://proceedings.mlsys.org/paper_files/paper/2023","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","CarratalaSaezGoIl22",{"paper":{"author":[{"given":"Rocío","family":"Carratalá-Sáez"},{"given":"Arturo","family":"González-Escribano"},{"given":"Alexandros-Stavros","family":"Iliopoulos"},{"given":"Charles E.","family":"Leiserson"},{"given":"Charlotte","family":"Park"},{"given":"Isabel","family":"Rosa"},{"given":"Tao B.","family":"Schardl"},{"given":"Yuri","family":"Torres"},{"given":"David P.","family":"Bunde"}],"container-title":"EduHPC","DOI":"10.1109/EduHPC56719.2022.00012","type":"paper-conference","id":"CarratalaSaezGoIl22","citation-key":"CarratalaSaezGoIl22","issued":{"date-parts":[[2022]]},"page":"50-56","title":"Peachy Parallel Assignments","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerStOu22",{"paper":{"author":[{"given":"Tim","family":"Kaler"},{"given":"Nickolas","family":"Stathas"},{"given":"Anne","family":"Ouyang"},{"given":"Alexandros-Stavros","family":"Iliopoulos"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"},{"given":"Jie","family":"Chen"}],"container-title":"MLSys","type":"paper-conference","id":"KalerStOu22","citation-key":"KalerStOu22","issued":{"date-parts":[[2022]]},"title":"Accelerating Training and Inference of Graph Neural Networks with Fast Sampling and Pipelining","URL":"https://proceedings.mlsys.org/paper_files/paper/2022","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","XuZhYi22",{"paper":{"author":[{"given":"Yifan","family":"Xu"},{"given":"Anchengcheng","family":"Zhou"},{"given":"Grace Q.","family":"Yin"},{"given":"Kunal","family":"Agrawal"},{"given":"I-Ting Angelina","family":"Lee"},{"given":"Tao B.","family":"Schardl"}],"container-title":"ALENEX","DOI":"10.1137/1.9781611977042.10","type":"paper-conference","id":"XuZhYi22","citation-key":"XuZhYi22","issued":{"date-parts":[[2022]]},"page":"117-130","title":"Efficient Access History for Race Detection","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","LeisersonSc22",{"paper":{"author":[{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"}],"container-title":"Massive Graph Analytics","DOI":"10.1201/9781003033707-2","editor":[{"given":"David A.","family":"Bader"}],"type":"chapter","id":"LeisersonSc22","citation-key":"LeisersonSc22","issued":{"date-parts":[[2022]]},"page":"3-33","title":"A Work-Efficient Parallel Breadth-First Search Algorithm (or How To Cope With the Nondeterminism of Reducers)","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","HasenplaughKaSc22",{"paper":{"author":[{"given":"William","family":"Hasenplaugh"},{"given":"Tim","family":"Kaler"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"Massive Graph Analytics","DOI":"10.1201/9781003033707-11","editor":[{"given":"David A.","family":"Bader"}],"type":"chapter","id":"HasenplaughKaSc22","citation-key":"HasenplaughKaSc22","issued":{"date-parts":[[2022]]},"page":"193-221","title":"Ordering Heuristics for Parallel Graph Coloring","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerHaSc22",{"paper":{"author":[{"given":"Tim","family":"Kaler"},{"given":"William","family":"Hasenplaugh"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"Massive Graph Analytics","DOI":"10.1201/9781003033707-18","editor":[{"given":"David A.","family":"Bader"}],"type":"chapter","id":"KalerHaSc22","citation-key":"KalerHaSc22","issued":{"date-parts":[[2022]]},"page":"397-429","title":"Executing Dynamic Data-Graph Computations Deterministically Using Chromatic Scheduling","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","HandlemanRaLe21",{"paper":{"author":[{"given":"Aaron","family":"Handleman"},{"given":"Arthur G.","family":"Rattew"},{"given":"I-Ting Angelina","family":"Lee"},{"given":"Tao B.","family":"Schardl"}],"container-title":"IPDPS","DOI":"10.1109/IPDPS49936.2021.00067","type":"paper-conference","id":"HandlemanRaLe21","citation-key":"HandlemanRaLe21","issued":{"date-parts":[[2021]]},"page":"587-598","title":"A Hybrid Scheduling Scheme for Parallel Loops","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerScXi21",{"paper":{"author":[{"given":"Tim","family":"Kaler"},{"given":"Tao B.","family":"Schardl"},{"given":"Brian","family":"Xie"},{"given":"Charles E.","family":"Leiserson"},{"given":"Jie","family":"Chen"},{"given":"Aldo","family":"Pareja"},{"given":"Georgios","family":"Kollias"}],"container-title":"APOCS","DOI":"10.1137/1.9781611976489.11","type":"paper-conference","id":"KalerScXi21","citation-key":"KalerScXi21","issued":{"date-parts":[[2021]]},"page":"144-158","title":"PARAD: A Work-Efficient Parallel Algorithm for Reverse-Mode Automatic Differentiation","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","LeisersonThEm20",{"paper":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:1:props:paper","annote":""}],["$","$L6","ParejaDoCh20",{"paper":{"author":[{"given":"Aldo","family":"Pareja"},{"given":"Giacomo","family":"Domeniconi"},{"given":"Jie","family":"Chen"},{"given":"Tengfei","family":"Ma"},{"given":"Toyotaro","family":"Suzumura"},{"given":"Hiroki","family":"Kanezashi"},{"given":"Tim","family":"Kaler"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"AAAI","DOI":"10.1609/aaai.v34i04.5984","type":"paper-conference","id":"ParejaDoCh20","citation-key":"ParejaDoCh20","issued":{"date-parts":[[2020]]},"page":"5363-5370","title":"EvolveGCN: Evolving Graph Convolutional Networks for Dynamic Graphs","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerKuSc20",{"paper":{"annote":"Best paper finalist","author":[{"given":"Tim","family":"Kaler"},{"given":"William","family":"Kuszmaul"},{"given":"Tao B.","family":"Schardl"},{"given":"Daniele","family":"Vettorel"}],"container-title":"APoCS","DOI":"10.1137/1.9781611976021.12","type":"chapter","id":"KalerKuSc20","citation-key":"KalerKuSc20","issued":{"date-parts":[[2020]]},"page":"162-176","title":"Cilkmem: Algorithms for Analyzing the Memory High-Water Mark of Fork-Join Parallel Programs","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Best paper finalist."}}]}],["$","$L6","SchardlSa19",{"paper":{"author":[{"given":"Tao B.","family":"Schardl"},{"given":"Siddharth","family":"Samsi"}],"container-title":"HPEC","DOI":"10.1109/HPEC.2019.8916312","type":"paper-conference","id":"SchardlSa19","citation-key":"SchardlSa19","issued":{"date-parts":[[2019,9]]},"keyword":"learning (artificial intelligence);linear algebra;mathematics computing;parallel processing;program compilers;software libraries;TapirXLA;recursive fork-join parallelism;machine-learning applications;compiler technology;machine-learning frameworks;compiler intermediate representation;mainstream compiler IR;machine learning;low-level parallel computation;high-level TensorFlow operations;compiler implementations;parallel running time;XLA compiler;linear-algebra library routines;Hardware;Optimization;Parallel processing;Training;Program processors;Computer architecture;Task analysis","page":"1-8","title":"TapirXLA: Embedding Fork-Join Parallelism into the XLA Compiler in TensorFlow Using Tapir","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlMoLe19",{"paper":{"publisher-place":"New York, NY, USA","container-title":"ACM Transactions on Parallel Computing","number-of-pages":"33","author":[{"given":"Tao B.","family":"Schardl"},{"given":"William S.","family":"Moses"},{"given":"Charles E.","family":"Leiserson"}],"DOI":"10.1145/3365655","type":"article-journal","id":"SchardlMoLe19","citation-key":"SchardlMoLe19","issue":"4","issued":{"date-parts":[[2019,12]]},"keyword":"optimization,parallel computing,LLVM,OpenMP,fork-join parallelism,multicore,compiling,Cilk,control-flow graph,Tapir,serial-projection property","publisher":"Association for Computing Machinery","title":"Tapir: Embedding Recursive Fork-Join Parallelism into LLVM’s Intermediate Representation","volume":"6","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","LeeSc18",{"paper":{"container-title":"ACM Transactions on Parallel Computing","number-of-pages":"40","author":[{"given":"I-Ting Angelina","family":"Lee"},{"given":"Tao B.","family":"Schardl"}],"DOI":"10.1145/3205914","type":"article-journal","id":"LeeSc18","citation-key":"LeeSc18","ISSN":"2329-4949","issue":"4","issued":{"date-parts":[[2018,8]]},"title":"Efficient Race Detection for Reducer Hyperobjects","volume":"4","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlLeLe18",{"paper":{"number-of-pages":"3","author":[{"given":"Tao B.","family":"Schardl"},{"given":"I-Ting Angelina","family":"Lee"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"SPAA","DOI":"10.1145/3210377.3210658","type":"paper-conference","id":"SchardlLeLe18","citation-key":"SchardlLeLe18","issued":{"date-parts":[[2018]]},"keyword":"cilk,multi-core programming,parallel algorithms,parallel languages,productivity tools","page":"351-353","title":"Brief Announcement: Open Cilk","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlDeDo18",{"paper":{"number-of-pages":"3","author":[{"given":"Tao B.","family":"Schardl"},{"given":"Tyler","family":"Denniston"},{"given":"Damon","family":"Doucet"},{"given":"Bradley C.","family":"Kuszmaul"},{"given":"I-Ting Angelina","family":"Lee"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"Abstracts of SIGMETRICS","DOI":"10.1145/3219617.3219657","type":"paper-conference","id":"SchardlDeDo18","citation-key":"SchardlDeDo18","issued":{"date-parts":[[2018]]},"keyword":"compiler-inserted instrumentation,dynamic program analysis,program instrumentation","page":"100-102","title":"The CSI Framework for Compiler-Inserted Program Instrumentation","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlDeDo17",{"paper":{"publisher-place":"New York, NY, USA","container-title":"SIGMETRICS","number-of-pages":"25","author":[{"given":"Tao B.","family":"Schardl"},{"given":"Tyler","family":"Denniston"},{"given":"Damon","family":"Doucet"},{"given":"Bradley C.","family":"Kuszmaul"},{"given":"I-Ting Angelina","family":"Lee"},{"given":"Charles E.","family":"Leiserson"}],"DOI":"10.1145/3154502","type":"article-journal","id":"SchardlDeDo17","citation-key":"SchardlDeDo17","issue":"2","issued":{"date-parts":[[2017,12]]},"keyword":"compiler-inserted instrumentation,dynamic program analysis,program instrumentation","page":"43:1-43:25","publisher":"ACM","title":"The CSI Framework for Compiler-Inserted Program Instrumentation","volume":"1","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlMoLe17",{"paper":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:2:props:paper","annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Won best paper award; invited to a special issue of ACM Transactions on Parallel Computing."}}]}],["$","$L6","LeisersonScSu16",{"paper":{"container-title":"Theory of Computing Systems","author":[{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"},{"given":"Warut","family":"Suksompong"}],"DOI":"10.1007/s00224-015-9613-9","type":"article-journal","id":"LeisersonScSu16","citation-key":"LeisersonScSu16","issue":"2","issued":{"date-parts":[[2016,2]]},"keyword":"Work stealing; Parallel algorithm; Extremal combinatorics; Binomial coefficient","page":"223-240","title":"Upper bounds on number of steals in rooted trees","volume":"58","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerHaSc16",{"paper":{"container-title":"ACM Transactions on Parallel Computing","number-of-pages":"31","author":[{"given":"Tim","family":"Kaler"},{"given":"William","family":"Hasenplaugh"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"DOI":"10.1145/2896850","type":"article-journal","id":"KalerHaSc16","citation-key":"KalerHaSc16","issue":"1","issued":{"date-parts":[[2016,7]]},"keyword":"Data-graph computations,chromatic scheduling,determinism,multicore,multithreading,parallel programming,scheduling,work stealing","page":"2:1-2:31","title":"Executing dynamic data-graph computations deterministically using chromatic scheduling","volume":"3","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","AbelDeDe16",{"paper":{"author":[{"given":"Zachary","family":"Abel"},{"given":"Erik D.","family":"Demaine"},{"given":"Martin L.","family":"Demaine"},{"given":"Sarah","family":"Eisenstat"},{"given":"Jayson","family":"Lynch"},{"given":"Tao B.","family":"Schardl"}],"container-title":"SoCG","DOI":"10.4230/LIPIcs.SoCG.2016.3","type":"paper-conference","id":"AbelDeDe16","citation-key":"AbelDeDe16","issued":{"date-parts":[[2016]]},"page":"3:1-3:15","title":"Who Needs Crossings? Hardness of Plane Graph Rigidity","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SuksompongLeSc16",{"paper":{"container-title":"Information Processing Letters","author":[{"given":"Warut","family":"Suksompong"},{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"}],"DOI":"10.1016/j.ipl.2015.10.002","type":"article-journal","id":"SuksompongLeSc16","citation-key":"SuksompongLeSc16","issue":"2","issued":{"date-parts":[[2016,2]]},"page":"100-106","title":"On the efficiency of localized work stealing","volume":"116","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","Schardl16",{"paper":{"publisher-place":"Cambridge, MA","publisher":"Massachusetts Institute of Technology","author":[{"given":"Tao B.","family":"Schardl"}],"DOI":"1721.1/107290","type":"thesis","genre":"phdthesis","id":"Schardl16","citation-key":"Schardl16","issued":{"date-parts":[[2016,9]]},"title":"Performance Engineering of Multicore Software: Developing a Science of Fast Code for the Post-Moore Era","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","LeeLeSc15",{"paper":{"container-title":"ACM Transactions on Parallel Computing","author":[{"given":"I-Ting Angelina","family":"Lee"},{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"},{"given":"Zhunping","family":"Zhang"},{"given":"Jim","family":"Sukha"}],"DOI":"10.1145/2809808","type":"article-journal","id":"LeeLeSc15","citation-key":"LeeLeSc15","issue":"3","issued":{"date-parts":[[2015,10]]},"page":"17:1-17:42","publisher":"ACM","title":"On-the-fly pipeline parallelism","volume":"2","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","SchardlKuLe15",{"paper":{"author":[{"given":"Tao B.","family":"Schardl"},{"given":"Bradley C.","family":"Kuszmaul"},{"given":"I-Ting Angelina","family":"Lee"},{"given":"William M.","family":"Leiserson"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"SPAA","DOI":"10.1145/2755573.2755603","type":"paper-conference","id":"SchardlKuLe15","citation-key":"SchardlKuLe15","issued":{"date-parts":[[2015]]},"page":"89-100","title":"The Cilkprof scalability profiler","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","LeeSc15",{"paper":{"annote":"Invited to a special issue of ACM Transactions on Parallel Computing","author":[{"given":"I-Ting Angelina","family":"Lee"},{"given":"Tao B.","family":"Schardl"}],"container-title":"SPAA","DOI":"10.1145/2755573.2755599","type":"paper-conference","id":"LeeSc15","citation-key":"LeeSc15","issued":{"date-parts":[[2015]]},"keyword":"cilk,determinacy race,nondeterminism,reducers,view-read race","page":"111-122","title":"Efficiently detecting races in Cilk programs that use reducer hyperobjects","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Invited to a special issue of ACM Transactions on Parallel Computing."}}]}],["$","$L6","HasenplaughKaSc14",{"paper":{"author":[{"given":"William","family":"Hasenplaugh"},{"given":"Tim","family":"Kaler"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"SPAA","DOI":"10.1145/2612669.2612697","type":"paper-conference","id":"HasenplaughKaSc14","citation-key":"HasenplaughKaSc14","issued":{"date-parts":[[2014]]},"keyword":"cilk,graph coloring,ordering heuristics,parallel algorithms","page":"166-177","title":"Ordering heuristics for parallel graph coloring","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","KalerHaSc14",{"paper":{"annote":"Invited to a special issue of ACM Transactions on Parallel Computing","author":[{"given":"Tim","family":"Kaler"},{"given":"William","family":"Hasenplaugh"},{"given":"Tao B.","family":"Schardl"},{"given":"Charles E.","family":"Leiserson"}],"container-title":"SPAA","DOI":"10.1145/2612669.2612673","type":"paper-conference","id":"KalerHaSc14","citation-key":"KalerHaSc14","issued":{"date-parts":[[2014]]},"keyword":"chromatic scheduling,data-graph computations,determinism,multicore,multithreading,parallel programming,reducers,work stealing","page":"154-165","title":"Executing dynamic data-graph computations deterministically using chromatic scheduling","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Invited to a special issue of ACM Transactions on Parallel Computing."}}]}],["$","$L6","LeeLeSc13",{"paper":{"annote":"Invited to a special issue of ACM Transactions on Parallel Computing","author":[{"given":"I-Ting Angelina","family":"Lee"},{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"},{"given":"Jim","family":"Sukha"},{"given":"Zhunping","family":"Zhang"}],"container-title":"SPAA","DOI":"10.1145/2486159.2486174","type":"paper-conference","id":"LeeLeSc13","citation-key":"LeeLeSc13","issued":{"date-parts":[[2013]]},"page":"140-151","title":"On-the-fly pipeline parallelism","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Invited to a special issue of ACM Transactions on Parallel Computing."}}]}],["$","$L6","AbelDeDe13",{"paper":{"container-title":"Journal of Information Processing","annote":"Won outstanding paper award","author":[{"given":"Zachary","family":"Abel"},{"given":"Erik D.","family":"Demaine"},{"given":"Martin L.","family":"Demaine"},{"given":"Sarah","family":"Eisenstat"},{"given":"Jayson","family":"Lynch"},{"given":"Tao B.","family":"Schardl"}],"DOI":"10.2197/ipsjjip.21.368","type":"article-journal","id":"AbelDeDe13","citation-key":"AbelDeDe13","issue":"3","issued":{"date-parts":[[2013]]},"page":"368-377","title":"Finding a Hamiltonian path in a cube with specified turns is hard","volume":"21","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Won outstanding paper award."}}]}],["$","$L6","AbelDeDe13a",{"paper":{"container-title":"International Journal of Computational Geometry & Applications","author":[{"given":"Zachary","family":"Abel"},{"given":"Erik D.","family":"Demaine"},{"given":"Martin L.","family":"Demaine"},{"given":"Sarah","family":"Eisenstat"},{"given":"Jayson","family":"Lynch"},{"given":"Tao B.","family":"Schardl"},{"given":"Isaac","family":"Shapiro-Ellowitz"}],"DOI":"10.1142/S0218195913600017","type":"article-journal","id":"AbelDeDe13a","citation-key":"AbelDeDe13a","issue":"02","issued":{"date-parts":[[2013]]},"page":"75-92","title":"Folding equilateral plane graphs","volume":"23","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","LeisersonScSu12",{"paper":{"author":[{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"},{"given":"Jim","family":"Sukha"}],"container-title":"PPoPP","DOI":"10.1145/2145816.2145841","type":"paper-conference","id":"LeisersonScSu12","citation-key":"LeisersonScSu12","issued":{"date-parts":[[2012]]},"page":"193-204","title":"Deterministic parallel random-number generation for dynamic-multithreading platforms","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","AbelDeDe11",{"paper":{"author":[{"given":"Zachary","family":"Abel"},{"given":"Erik D.","family":"Demaine"},{"given":"Martin L.","family":"Demaine"},{"given":"Sarah","family":"Eisenstat"},{"given":"Jayson","family":"Lynch"},{"given":"Tao B.","family":"Schardl"},{"given":"Isaac","family":"Shapiro-Ellowitz"}],"container-title":"ISAAC","DOI":"10.1007/978-3-642-25591-5_59","type":"paper-conference","id":"AbelDeDe11","citation-key":"AbelDeDe11","issued":{"date-parts":[[2011]]},"page":"574-583","title":"Folding equilateral plane graphs","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}],["$","$L6","Schardl10",{"paper":{"publisher-place":"Cambridge, MA","publisher":"Massachusetts Institute of Technology","annote":"Awarded the Charles and Jennifer Johnson CS M.Eng. Prize","author":[{"given":"Tao B.","family":"Schardl"}],"DOI":"1721.1/61575","type":"thesis","genre":"mathesis","id":"Schardl10","citation-key":"Schardl10","issued":{"date-parts":[[2010,5]]},"title":"Design and analysis of a nondeterministic parallel breadth-first search algorithm","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":["$","span",null,{"className":"block font-semibold text-rose-700 dark:text-rose-400","dangerouslySetInnerHTML":{"__html":" Awarded the Charles and Jennifer Johnson CS M.Eng. Prize."}}]}],["$","$L6","LeisersonSc10",{"paper":{"author":[{"given":"Charles E.","family":"Leiserson"},{"given":"Tao B.","family":"Schardl"}],"container-title":"SPAA","DOI":"10.1145/1810479.1810534","type":"paper-conference","id":"LeisersonSc10","citation-key":"LeisersonSc10","issued":{"date-parts":[[2010]]},"page":"303-314","title":"A work-efficient parallel breadth-first search algorithm (or how to cope with the nondeterminism of reducers)","_graph":"$0:f:0:1:2:children:2:children:1:props:children:0:props:children:0:props:children:props:children:2:0:props:paper:_graph"},"annote":""}]]]}]}],["$","footer",null,{"style":{"marginTop":"var(--space-5)","fontSize":"var(--font-sm)"},"className":"text-zinc-500","children":["© ",2025," Tao B. Schardl"," ","· Last modified ","$@8"," ","· ",["$","a",null,{"href":"http://accessibility.mit.edu/","children":"Accessibility"}]]}]]}],[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/ff47c8294a96d26a.css","precedence":"next","crossOrigin":"$undefined","nonce":"$undefined"}]],["$","$L9",null,{"children":"$La"}]]}],{},null,false]},null,false]},null,false],["$","$1","h",{"children":[null,["$","$1","z7v5qGjwNA2yC1SOc2Bv2",{"children":[["$","$Lb",null,{"children":"$Lc"}],["$","$Ld",null,{"children":"$Le"}],null]}]]}],false]],"m":"$undefined","G":["$f","$undefined"],"s":false,"S":true} e:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}]] c:[["$","meta","0",{"charSet":"utf-8"}],["$","title","1",{"children":"Papers | Tao B. Schardl"}],["$","link","2",{"rel":"author","href":"https://neboat.mit.edu"}],["$","meta","3",{"name":"author","content":"Tao B. Schardl"}],["$","meta","4",{"name":"generator","content":"Next.js"}],["$","meta","5",{"name":"referrer","content":"origin-when-cross-origin"}],["$","link","6",{"rel":"canonical","href":"https://neboat.mit.edu/papers"}],["$","meta","7",{"name":"format-detection","content":"telephone=no, address=no, email=no"}],["$","meta","8",{"property":"og:title","content":"Papers | Tao B. Schardl"}],["$","meta","9",{"property":"og:type","content":"website"}],["$","meta","10",{"name":"twitter:card","content":"summary"}],["$","meta","11",{"name":"twitter:title","content":"Papers | Tao B. Schardl"}],["$","link","12",{"rel":"icon","href":"/icon.svg?76306e7d0afecba5","type":"image/svg+xml","sizes":"any"}]] a:null 8:"01/11/2025"