@inproceedings{oai:uec.repo.nii.ac.jp:00001924,
 author = {Irie, Hidetsugu and Fujiwara, Daisuke and Majima, Kazuki and Yoshinaga, Tsutomu},
 book = {Third International Conference on Networking and Computing (ICNC)},
 month = {Dec},
 note = {As the number of cores as well as the network sizeprograms. For scale-out applications, we assume the manycore processor structure, which consists of a number of STRAIGHT architecture cores (SAC) that are loosely connected each other. Being the first report on this novel processor architecture, in this paper, we discuss the concept behind STRAIGHT, propose basic principles, and estimate the performance and budget expectation. The rest of the paper consists of following sections. Section II revisits studies of new architectures that were designed to improve the ILP/TLP performance of superscalar processors, and discusses the dilemma of both scalability approach and quick worker approach. In section III, we discuss the key idea of STRAIGHT that allows the resolution of this dilemma by introducing a distributed key-value store to the processor architecture. Software and hardware outline models of STRAIGHT are described in section IV. Section V estimates the performance of STRAIGHT by using a cycleaccurate superscalar simulator and possible parameters, as well as hardware budgets. Finally, we summarize the paper in section VI in a processor chip increases, the erformance of each core is more critical for the improvement of the total chip performance.However, to improve the total chip performance, the performance per power or per unit area must be improved, making it difficult to adopt a conventional approach of superscalar extension. In this paper, we explore a new core structure that is suitable for manycore processors. We revisit prior studies of new instructionlevel (ILP) and thread-level parallelism (TLP) architectures and propose our novel STRAIGHT processor architecture. By introducing the scheme of distributed key-value-store to the register file of clustered microarchitectures, STRAIGHT directly executes the operation with large logical registers, which are written only once. By discussing the processor structure, microarchitecture, and code model, we show that STRAIGHT realizes both large instruction window and lightweight rapid execution, while suppressing the hardware and energy cost. Preliminary estimation results are promising, and show that STRAIGHT improves the single thread performance by about 30%, which is the geometric mean of the SPEC CPU 2006 benchmark suite, without significantly increasing the power and area budget. As the number of cores as well as the network sizeprograms. For scale-out applications, we assume the manycore processor structure, which consists of a number of STRAIGHT architecture cores (SAC) that are loosely connected each other. Being the first report on this novel processor architecture, in this paper, we discuss the concept behind STRAIGHT, propose basic principles, and estimate the performance and budget expectation. The rest of the paper consists of following sections. Section II revisits studies of new architectures that were designed to improve the ILP/TLP performance of superscalar processors, and discusses the dilemma of both scalability approach and quick worker approach. In section III, we discuss the key idea of STRAIGHT that allows the resolution of this dilemma by introducing a distributed key-value store to the processor architecture. Software and hardware outline models of STRAIGHT are described in section IV. Section V estimates the performance of STRAIGHT by using a cycleaccurate superscalar simulator and possible parameters, as well as hardware budgets. Finally, we summarize the paper in section VI in a processor chip increases, the erformance of each core is more critical for the improvement of the total chip performance.However, to improve the total chip performance, the performance per power or per unit area must be improved, making it difficult to adopt a conventional approach of superscalar extension. In this paper, we explore a new core structure that is suitable for manycore processors. We revisit prior studies of new instructionlevel (ILP) and thread-level parallelism (TLP) architectures and propose our novel STRAIGHT processor architecture. By introducing the scheme of distributed key-value-store to the register file of clustered microarchitectures, STRAIGHT directly executes the operation with large logical registers, which are written only once. By discussing the processor structure, microarchitecture, and code model, we show that STRAIGHT realizes both large instruction window and lightweight rapid execution, while suppressing the hardware and energy cost. Preliminary estimation results are promising, and show that STRAIGHT improves the single thread performance by about 30%, which is the geometric mean of the SPEC CPU 2006 benchmark suite, without significantly increasing the power and area budget. As the number of cores as well as the network sizeprograms. For scale-out applications, we assume the manycore processor structure, which consists of a number of STRAIGHT architecture cores (SAC) that are loosely connected each other. Being the first report on this novel processor architecture, in this paper, we discuss the concept behind STRAIGHT, propose basic principles, and estimate the performance and budget expectation. The rest of the paper consists of following sections. Section II revisits studies of new architectures that were designed to improve the ILP/TLP performance of superscalar processors, and discusses the dilemma of both scalability approach and quick worker approach. In section III, we discuss the key idea of STRAIGHT that allows the resolution of this dilemma by introducing a distributed key-value store to the processor architecture. Software and hardware outline models of STRAIGHT are described in section IV. Section V estimates the performance of STRAIGHT by using a cycleaccurate superscalar simulator and possible parameters, as well as hardware budgets. Finally, we summarize the paper in section VI in a processor chip increases, the erformance of each core is more critical for the improvement of the total chip performance.However, to improve the total chip performance, the performance per power or per unit area must be improved, making it difficult to adopt a conventional approach of superscalar extension. In this paper, we explore a new core structure that is suitable for manycore processors. We revisit prior studies of new instructionlevel (ILP) and thread-level parallelism (TLP) architectures and propose our novel STRAIGHT processor architecture. By introducing the scheme of distributed key-value-store to the register file of clustered microarchitectures, STRAIGHT directly executes the operation with large logical registers, which are written only once. By discussing the processor structure, microarchitecture, and code model, we show that STRAIGHT realizes both large instruction window and lightweight rapid execution, while suppressing the hardware and energy cost. Preliminary estimation results are promising, and show that STRAIGHT improves the single thread performance by about 30%, which is the geometric mean of the SPEC CPU 2006 benchmark suite, without significantly increasing the power and area budget.},
 pages = {336--342},
 title = {STRAIGHT: Realizing a Lightweight Large Instruction Window by using Eventually Consistent Distributed Registers},
 year = {2012}
}