2. 整列
著者:梅谷 武
各言語において整列の基本動作を比較する。
作成:2024-03-29
更新:2024-09-08
更新:2024-09-08
n個のリストを整列する試験を行なうためにn個の乱数を生成してファイルに保存するプログラムを作成する。
乱数生成はC++で行なう。乱数データは一様分布と正規分布の二種類を用意する。
random.cpp:
#include <algorithm>
#include <cmath>
#include <fstream>
#include <iostream>
#include <numeric>
#include <print>
#include <random>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#ifdef NORMAL
int dist_type = 2;
#else
int dist_type = 1;
#endif
bool random_out(const std::string &filename, int n, int type) {
std::vector<int> nums;
std::random_device rd;
std::mt19937 gen(rd());
switch (type) {
case 1: {
std::uniform_int_distribution<int> dis(0, n * 100);
for (int i = 0; i < n; ++i)
nums.push_back(dis(gen));
break;
}
case 2: {
double mean = n * 50.0;
double stddev = n * 10.0;
std::normal_distribution<> dis(mean, stddev);
for (int i = 0; i < n; ++i) {
double raw = dis(gen);
int val = std::clamp(static_cast<int>(raw), 0, n * 100);
nums.push_back(val);
}
break;
}
default:
throw std::runtime_error("Invalid distribution type selected.");
return false;
}
// statistics
double mean = std::accumulate(nums.begin(), nums.end(), 0l);
mean /= static_cast<double>(n);
double sum = 0.0;
for (int val : nums) {
sum += (static_cast<double>(val) - mean) * (static_cast<double>(val) - mean);
}
double variance = sum / n;
double sd = sqrt(variance);
int min = *std::min_element(nums.begin(), nums.end());
int max = *std::max_element(nums.begin(), nums.end());
std::println("min, max = {:>11}, {:>11}", min, max);
std::println("mean, sd = {:11.1f}, {:11.1f}", mean, sd);
// write file
std::ofstream outfile(filename);
if (!outfile) {
throw std::runtime_error("Could not open file for writing.");
return false;
}
std::ostringstream buf;
for (int num : nums)
buf << num << "\n";
outfile << buf.str();
outfile.close();
return true;
}
int main(int argc, char *argv[]) {
try {
if (argc != 3) {
#ifdef NORMAL
throw std::invalid_argument(
"Usage: normal <filename> <datasize>");
#else
throw std::invalid_argument(
"Usage: uniform <filename> <datasize>");
#endif
}
std::string filename(argv[1]);
int n = std::stol(argv[2]);
if (n <= 0) {
throw std::invalid_argument(
"Datasize must be a positive integer.");
}
if (random_out(filename, n, dist_type))
std::println("{} data file created: {}", n, filename);
else
std::println("An error has occurred.");
} catch (const std::logic_error &e) {
std::println(stderr, "Logic error: {}", e.what());
} catch (const std::runtime_error &e) {
std::println(stderr, "Runtime error: {}", e.what());
} catch (const std::exception &e) {
std::println(stderr, "Another std::exception: {}", e.what());
} catch (...) {
std::println(stderr, "Unknown exception.");
}
return 0;
}
makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = random.cpp
EXES = uniform normal
all: $(EXES)
uniform: $(SRC)
$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
normal: $(SRC)
$(CPPG) $(CPPFLAGS) -DNORMAL -o $@ $(SRC)
clean:
rm -f $(EXES)
1000000個の基準データ:uniform1m.txt, normal1m.txtを作成し、それを固定して各言語を比較する。
$ uniform uniform1m.txt 1000000
min, max = 280, 99999906
mean, sd = 50012261.7, 28868234.9
1000000 data file created: uniform1m.txt
$ normal normal1m.txt 1000000
min, max = 2853339, 98883771
mean, sd = 49982596.5, 9992196.7
1000000 data file created: normal1m.txt
指定した入力ファイルから整数列を読み込んでリストに格納し、それを昇順整列し、指定した出力ファイルに結果を書き込む。
std::sortで整列する。
sort.cpp
#include <chrono>
#include <deque>
#include <fstream>
#include <iostream>
#include <print>
#include <sstream>
#include <stdexcept>
bool test(const std::string &infile, const std::string &outfile) {
std::deque<long> lst;
// read file
auto start = std::chrono::high_resolution_clock::now();
std::ifstream fin(infile);
if (!fin) {
throw std::runtime_error("Could not open file for reading.");
return false;
}
int num;
while (fin >> num)
lst.push_back(num);
fin.close();
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = end - start;
std::println("length = {}", lst.size());
std::println("read time: {:9.6f} [sec]", duration.count());
// sort
start = std::chrono::high_resolution_clock::now();
std::sort(lst.begin(), lst.end());
end = std::chrono::high_resolution_clock::now();
duration = end - start;
std::println("sort time: {:9.6f} [sec]", duration.count());
// write file
start = std::chrono::high_resolution_clock::now();
std::ofstream fout(outfile);
if (!fout) {
throw std::runtime_error("Could not open file for writing.");
return false;
}
std::ostringstream buf;
for (long num : lst)
buf << num << "\n";
fout << buf.str();
fout.close();
end = std::chrono::high_resolution_clock::now();
duration = end - start;
std::println("write time: {:9.6f} [sec]", duration.count());
return true;
}
int main(int argc, char *argv[]) {
try {
if (argc != 3) {
throw std::invalid_argument(
"Usage: sort <infile> <outfile>");
}
std::string infile(argv[1]);
std::string outfile(argv[2]);
if (!test(infile, outfile))
std::println("An error has occurred.");
} catch (const std::logic_error &e) {
std::println(stderr, "Logic error: {}", e.what());
} catch (const std::runtime_error &e) {
std::println(stderr, "Runtime error: {}", e.what());
} catch (const std::exception &e) {
std::println(stderr, "Another std::exception: {}", e.what());
} catch (...) {
std::println(stderr, "Unknown exception.");
}
return 0;
}
makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = sort.cpp
EXES = sort_g sort_l
all: $(EXES)
sort_g: $(SRC)
$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
sort_l: $(SRC)
$(CPPL) $(CPPFLAGS) -o $@ $(SRC)
clean:
rm -f $(EXES)
基準データ:uniform1m.txt, normal1m.txtで測定する。
$ measure ./sort_g uniform1m.txt sorted_u1m_g.txt
length = 1000000
read time: 0.094046 [sec]
sort time: 0.118721 [sec]
write time: 0.093249 [sec]
======================================
Process exited with status: 0
total time: 0.312407 [sec]
mem size: 30100 [KB]
code size: 135 [KB]
$ measure ./sort_g normal1m.txt sorted_n1m_g.txt
length = 1000000
read time: 0.092892 [sec]
sort time: 0.117778 [sec]
write time: 0.092463 [sec]
======================================
Process exited with status: 0
total time: 0.304430 [sec]
mem size: 29700 [KB]
code size: 135 [KB]
$ measure ./sort_l uniform1m.txt sorted_u1m_l.txt
length = 1000000
read time: 0.093836 [sec]
sort time: 0.118416 [sec]
write time: 0.093618 [sec]
======================================
Process exited with status: 0
total time: 0.309713 [sec]
mem size: 30396 [KB]
code size: 116 [KB]
$ measure ./sort_l normal1m.txt sorted_n1m_l.txt
length = 1000000
read time: 0.091408 [sec]
sort time: 0.121176 [sec]
write time: 0.091928 [sec]
======================================
Process exited with status: 0
total time: 0.308297 [sec]
mem size: 30944 [KB]
code size: 116 [KB]
このプログラムで注意しなければならないのは、ファイル終了の例外 exception End_of_file を try .. with ... で処理しようとすると末尾再帰にならないということである。この場合、n = 1000000でスタックオーバーフローになる。これをmatch式で処理することで末尾再帰最適化が行われ、大きなファイルを読み込めるようになる。
整列はList.sortを用いる。
sort.ml:
open Printf
let read_data filename =
let ichan = open_in filename in
let rec read_lines acc =
match input_line ichan with
| line ->
let num = int_of_string line in
read_lines (num :: acc)
| exception End_of_file ->
close_in ichan;
List.rev acc in
read_lines []
let write_data filename lst =
let ochan = open_out filename in
List.iter (fun x -> fprintf ochan "%d\n" x) lst;
close_out ochan
let test infile outfile =
(* read file *)
let start = Unix.gettimeofday () in
let lst = read_data infile in
let end_ = Unix.gettimeofday () in
printf "length = %d\n" (List.length lst);
printf "read time: %9.6f [sec]\n" (end_ -. start);
(* sort *)
let start = Unix.gettimeofday () in
let sorted = List.sort compare lst in
let end_ = Unix.gettimeofday () in
printf "sort time: %9.6f [sec]\n" (end_ -. start);
(* write file *)
let start = Unix.gettimeofday () in
write_data outfile sorted;
let end_ = Unix.gettimeofday () in
printf "write time: %9.6f [sec]\n" (end_ -. start)
let () =
try
if Array.length Sys.argv <> 3 then
raise (Invalid_argument
"Usage: sort <infile> <outfile>")
else
let infile = Sys.argv.(1) in
let outfile = Sys.argv.(2) in
test infile outfile
with
| Invalid_argument msg ->
eprintf "Logic error: %s\n" msg
| Failure msg ->
eprintf "Runtime error: %s\n" msg
| exn ->
eprintf "Another exception: %s\n" (Printexc.to_string exn)
makefile:
OCAMLOPT = ocamlopt
OCAMLFLAGS = -O2
LIBPATH = -I +unix
LIBS = unix.cmxa
SRC = sort.ml
EXES = sort
all: $(EXES)
sort: $(SRC)
$(OCAMLOPT) $(OCAMLFLAGS) $(LIBPATH) -o $@ $(LIBS) $(SRC)
clean:
rm -f $(EXES) *.o *.cmx *.cmi
基準データ:uniform1m.txt, normal1m.txtで測定する。
$ measure ./sort uniform1m.txt sorted_u1m.txt
length = 1000000
read time: 0.299461 [sec]
sort time: 0.599918 [sec]
write time: 0.262236 [sec]
======================================
Process exited with status: 0
total time: 1.173211 [sec]
mem size: 92964 [KB]
code size: 3289 [KB]
$ measure ./sort normal1m.txt sorted_n1m.txt
length = 1000000
read time: 0.272055 [sec]
sort time: 0.618160 [sec]
write time: 0.246833 [sec]
======================================
Process exited with status: 0
total time: 1.146409 [sec]
mem size: 96408 [KB]
code size: 3289 [KB]
整列はsortを用いる。
sort.rkt
#lang racket
(require iso-printf)
(define (read-data filename)
(call-with-input-file filename
(lambda (port)
(let loop ([lst '()])
(let ([line (read-line port)])
(if (eof-object? line)
(reverse lst)
(loop (cons (string->number line) lst))))))))
(define (write-data filename lst)
(call-with-output-file filename #:exists 'replace
(lambda (port)
(for-each (lambda (num) (displayln num port))
lst))))
(define (test infile outfile)
; read file
(define start (current-inexact-milliseconds))
(define lst (read-data infile))
(define end (current-inexact-milliseconds))
(printf "length = %d\n" (length lst))
(printf "read time: %9.6f [sec]\n" (/ (- end start) 1000.0))
; sort
(set! start (current-inexact-milliseconds))
(define sorted (sort lst <))
(set! end (current-inexact-milliseconds))
(printf "sort time: %9.6f [sec]\n" (/ (- end start) 1000.0))
; write file
(set! start (current-inexact-milliseconds))
(write-data outfile sorted)
(set! end (current-inexact-milliseconds))
(printf "write time: %9.6f [sec]\n" (/ (- end start) 1000.0)))
(define (main args)
(with-handlers
([exn:fail? (λ (e) (eprintf "%s\n" (exn-message e)))]
[exn? (λ (e) (eprintf "Unexpected: %s\n" (exn-message e)))])
(cond
[(not (= (vector-length args) 2))
(error "Usage: sort <infile> <outfile>")]
[else
(let ([infile (vector-ref args 0)]
[outfile (vector-ref args 1)])
(test infile outfile))])))
(main (current-command-line-arguments))
基準データ:uniform1m.txt, normal1m.txtで測定する。
$ raco exe list5.rkt
$ measure ./sort uniform1m.txt sorted_u1m.txt
length = 1000000
read time: 0.775872 [sec]
sort time: 0.268304 [sec]
write time: 0.914904 [sec]
======================================
Process exited with status: 0
total time: 2.438084 [sec]
mem size: 166360 [KB]
code size: 12445 [KB]
$ measure ./sort normal1m.txt sorted_n1m.txt
length = 1000000
read time: 0.786007 [sec]
sort time: 0.263898 [sec]
write time: 0.943548 [sec]
======================================
Process exited with status: 0
total time: 2.463952 [sec]
mem size: 166484 [KB]
code size: 12445 [KB]
測定結果を表にまとめる。単位は秒[sec]。
測定項目 | GCC | Clang | OCaml | Racket | |
---|---|---|---|---|---|
一様分布 | read file | 0.094 | 0.094 | 0.299 | 0.776 |
sort | 0.119 | 0.118 | 0.600 | 0.268 | |
write file | 0.093 | 0.094 | 0.262 | 0.915 | |
正規分布 | read file | 0.093 | 0.091 | 0.272 | 0.786 |
sort | 0.118 | 0.121 | 0.618 | 0.264 | |
write file | 0.092 | 0.092 | 0.247 | 0.944 |
This document is licensed under the MIT License.
Copyright (C) 2024 Takeshi Umetani