2. 整列
著者:梅谷 武
各言語において整列の基本動作を比較する。
作成:2024-03-29
更新:2024-09-08
 n個のリストを整列する試験を行なうためにn個の乱数を生成してファイルに保存するプログラムを作成する。

C++

 乱数生成はC++で行なう。乱数データは一様分布と正規分布の二種類を用意する。
 random.cpp:
#include <algorithm>
#include <cmath>
#include <fstream>
#include <iostream>
#include <numeric>
#include <print>
#include <random>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
 
#ifdef NORMAL
int dist_type = 2;
#else
int dist_type = 1;
#endif
 
bool random_out(const std::string &filename, int n, int type) {
  std::vector<int> nums;
  std::random_device rd;
  std::mt19937 gen(rd());
  switch (type) {
  case 1: {
    std::uniform_int_distribution<int> dis(0, n * 100);
    for (int i = 0; i < n; ++i)
      nums.push_back(dis(gen));
    break;
  }
  case 2: {
    double mean = n * 50.0;
    double stddev = n * 10.0;
    std::normal_distribution<> dis(mean, stddev);
    for (int i = 0; i < n; ++i) {
      double raw = dis(gen);
      int val = std::clamp(static_cast<int>(raw), 0, n * 100);
      nums.push_back(val);
    }
    break;
  }
  default:
    throw std::runtime_error("Invalid distribution type selected.");
    return false;
  }
  // statistics
  double mean = std::accumulate(nums.begin(), nums.end(), 0l);
  mean /= static_cast<double>(n);
  double sum = 0.0;
  for (int val : nums) {
    sum += (static_cast<double>(val) - mean) * (static_cast<double>(val) - mean);
  }
  double variance = sum / n;
  double sd = sqrt(variance);
  int min = *std::min_element(nums.begin(), nums.end());
  int max = *std::max_element(nums.begin(), nums.end());
  std::println("min, max = {:>11}, {:>11}", min, max);
  std::println("mean, sd = {:11.1f}, {:11.1f}", mean, sd);
  // write file
  std::ofstream outfile(filename);
  if (!outfile) {
    throw std::runtime_error("Could not open file for writing.");
    return false;
  }
  std::ostringstream buf;
  for (int num : nums)
    buf << num << "\n";
  outfile << buf.str();
  outfile.close();
  return true;
}
 
int main(int argc, char *argv[]) {
  try {
    if (argc != 3) {
#ifdef NORMAL
      throw std::invalid_argument(
          "Usage: normal <filename> <datasize>");
#else
      throw std::invalid_argument(
          "Usage: uniform <filename> <datasize>");
#endif
    }
    std::string filename(argv[1]);
    int n = std::stol(argv[2]);
    if (n <= 0) {
      throw std::invalid_argument(
          "Datasize must be a positive integer.");
    }
    if (random_out(filename, n, dist_type))
      std::println("{} data file created: {}", n, filename);
    else
      std::println("An error has occurred.");
  } catch (const std::logic_error &e) {
    std::println(stderr, "Logic error: {}", e.what());
  } catch (const std::runtime_error &e) {
    std::println(stderr, "Runtime error: {}", e.what());
  } catch (const std::exception &e) {
    std::println(stderr, "Another std::exception: {}", e.what());
  } catch (...) {
    std::println(stderr, "Unknown exception.");
  }
  return 0;
}


 makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = random.cpp
EXES = uniform normal
 
all: $(EXES)
 
uniform: $(SRC)
	$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
 
normal: $(SRC)
	$(CPPG) $(CPPFLAGS) -DNORMAL -o $@ $(SRC)
 
clean:
	rm -f $(EXES)


 1000000個の基準データ:uniform1m.txt, normal1m.txtを作成し、それを固定して各言語を比較する。
$ uniform uniform1m.txt 1000000
min, max =         280,    99999906
mean, sd =  50012261.7,  28868234.9
1000000 data file created: uniform1m.txt
$ normal normal1m.txt 1000000
min, max =     2853339,    98883771
mean, sd =  49982596.5,   9992196.7
1000000 data file created: normal1m.txt
 指定した入力ファイルから整数列を読み込んでリストに格納し、それを昇順整列し、指定した出力ファイルに結果を書き込む。

C++

 std::sortで整列する。
 sort.cpp
#include <chrono>
#include <deque>
#include <fstream>
#include <iostream>
#include <print>
#include <sstream>
#include <stdexcept>
 
bool test(const std::string &infile, const std::string &outfile) {
  std::deque<long> lst;
  // read file
  auto start = std::chrono::high_resolution_clock::now();
  std::ifstream fin(infile);
  if (!fin) {
    throw std::runtime_error("Could not open file for reading.");
    return false;
  }
  int num;
  while (fin >> num)
    lst.push_back(num);
  fin.close();
  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> duration = end - start;
  std::println("length = {}", lst.size());
  std::println("read time:  {:9.6f} [sec]", duration.count());
  // sort
  start = std::chrono::high_resolution_clock::now();
  std::sort(lst.begin(), lst.end());
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("sort time:  {:9.6f} [sec]", duration.count());
  // write file
  start = std::chrono::high_resolution_clock::now();
  std::ofstream fout(outfile);
  if (!fout) {
    throw std::runtime_error("Could not open file for writing.");
    return false;
  }
  std::ostringstream buf;
  for (long num : lst)
    buf << num << "\n";
  fout << buf.str();
  fout.close();
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("write time: {:9.6f} [sec]", duration.count());
  return true;
}
 
int main(int argc, char *argv[]) {
  try {
    if (argc != 3) {
      throw std::invalid_argument(
          "Usage: sort <infile> <outfile>");
    }
    std::string infile(argv[1]);
    std::string outfile(argv[2]);
    if (!test(infile, outfile))
      std::println("An error has occurred.");
  } catch (const std::logic_error &e) {
    std::println(stderr, "Logic error: {}", e.what());
  } catch (const std::runtime_error &e) {
    std::println(stderr, "Runtime error: {}", e.what());
  } catch (const std::exception &e) {
    std::println(stderr, "Another std::exception: {}", e.what());
  } catch (...) {
    std::println(stderr, "Unknown exception.");
  }
  return 0;
}


 makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = sort.cpp
EXES = sort_g sort_l
 
all: $(EXES)
 
sort_g: $(SRC)
	$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
 
sort_l: $(SRC)
	$(CPPL) $(CPPFLAGS) -o $@ $(SRC)
 
clean:
	rm -f $(EXES)


 基準データ:uniform1m.txt, normal1m.txtで測定する。
$ measure ./sort_g uniform1m.txt sorted_u1m_g.txt
length = 1000000
read time:   0.094046 [sec]
sort time:   0.118721 [sec]
write time:  0.093249 [sec]
======================================
Process exited with status: 0
total time:  0.312407 [sec]
mem  size:      30100 [KB]
code size:        135 [KB]
$ measure ./sort_g normal1m.txt sorted_n1m_g.txt
length = 1000000
read time:   0.092892 [sec]
sort time:   0.117778 [sec]
write time:  0.092463 [sec]
======================================
Process exited with status: 0
total time:  0.304430 [sec]
mem  size:      29700 [KB]
code size:        135 [KB]
$ measure ./sort_l uniform1m.txt sorted_u1m_l.txt
length = 1000000
read time:   0.093836 [sec]
sort time:   0.118416 [sec]
write time:  0.093618 [sec]
======================================
Process exited with status: 0
total time:  0.309713 [sec]
mem  size:      30396 [KB]
code size:        116 [KB]
$ measure ./sort_l normal1m.txt sorted_n1m_l.txt
length = 1000000
read time:   0.091408 [sec]
sort time:   0.121176 [sec]
write time:  0.091928 [sec]
======================================
Process exited with status: 0
total time:  0.308297 [sec]
mem  size:      30944 [KB]
code size:        116 [KB]
 このプログラムで注意しなければならないのは、ファイル終了の例外 exception End_of_file を try .. with ... で処理しようとすると末尾再帰にならないということである。この場合、n = 1000000でスタックオーバーフローになる。これをmatch式で処理することで末尾再帰最適化が行われ、大きなファイルを読み込めるようになる。
 整列はList.sortを用いる。
 sort.ml:
open Printf
 
let read_data filename =
  let ichan = open_in filename in
  let rec read_lines acc =
    match input_line ichan with
    | line ->
      let num = int_of_string line in
      read_lines (num :: acc)
    | exception End_of_file ->
      close_in ichan;
      List.rev acc in
  read_lines []
 
let write_data filename lst =
  let ochan = open_out filename in
  List.iter (fun x -> fprintf ochan "%d\n" x) lst;
  close_out ochan
 
let test infile outfile =
  (* read file *)
  let start = Unix.gettimeofday () in
  let lst = read_data infile in
  let end_ = Unix.gettimeofday () in
  printf "length = %d\n" (List.length lst);
  printf "read time:  %9.6f [sec]\n" (end_ -. start);
  (* sort *)
  let start = Unix.gettimeofday () in
  let sorted = List.sort compare lst in
  let end_ = Unix.gettimeofday () in
  printf "sort time:  %9.6f [sec]\n" (end_ -. start);
  (* write file *)
  let start = Unix.gettimeofday () in
  write_data outfile sorted;
  let end_ = Unix.gettimeofday () in
  printf "write time: %9.6f [sec]\n" (end_ -. start)
 
let () =
  try
    if Array.length Sys.argv <> 3 then
      raise (Invalid_argument
        "Usage: sort <infile> <outfile>")
    else
      let infile =  Sys.argv.(1) in
      let outfile =  Sys.argv.(2) in
      test infile outfile
  with
    | Invalid_argument msg ->
        eprintf "Logic error: %s\n" msg
    | Failure msg ->
        eprintf "Runtime error: %s\n" msg
    | exn ->
        eprintf "Another exception: %s\n" (Printexc.to_string exn)


 makefile:
OCAMLOPT = ocamlopt
OCAMLFLAGS = -O2
LIBPATH = -I +unix
LIBS = unix.cmxa
SRC = sort.ml
EXES = sort
 
all: $(EXES)
 
sort: $(SRC)
	$(OCAMLOPT) $(OCAMLFLAGS) $(LIBPATH) -o $@ $(LIBS) $(SRC)
 
clean:
	rm -f $(EXES) *.o *.cmx *.cmi


 基準データ:uniform1m.txt, normal1m.txtで測定する。
$ measure ./sort uniform1m.txt sorted_u1m.txt
length = 1000000
read time:   0.299461 [sec]
sort time:   0.599918 [sec]
write time:  0.262236 [sec]
======================================
Process exited with status: 0
total time:  1.173211 [sec]
mem  size:      92964 [KB]
code size:       3289 [KB]
$ measure ./sort normal1m.txt sorted_n1m.txt
length = 1000000
read time:   0.272055 [sec]
sort time:   0.618160 [sec]
write time:  0.246833 [sec]
======================================
Process exited with status: 0
total time:  1.146409 [sec]
mem  size:      96408 [KB]
code size:       3289 [KB]
 整列はsortを用いる。
 sort.rkt
#lang racket
(require iso-printf)
 
(define (read-data filename)
  (call-with-input-file filename
    (lambda (port)
      (let loop ([lst '()])
        (let ([line (read-line port)])
          (if (eof-object? line)
              (reverse lst)
              (loop (cons (string->number line) lst))))))))
 
(define (write-data filename lst)
  (call-with-output-file filename #:exists 'replace
    (lambda (port)
      (for-each (lambda (num) (displayln num port))
                lst))))
 
(define (test infile outfile)
  ; read file
  (define start (current-inexact-milliseconds))
  (define lst (read-data infile))
  (define end (current-inexact-milliseconds))
  (printf "length = %d\n" (length lst))
  (printf "read time:  %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; sort
  (set! start (current-inexact-milliseconds))
  (define sorted (sort lst <))
  (set! end (current-inexact-milliseconds))
  (printf "sort time:  %9.6f [sec]\n" (/ (- end start) 1000.0))
   ; write file
  (set! start (current-inexact-milliseconds))
  (write-data outfile sorted)
  (set! end (current-inexact-milliseconds))
  (printf "write time: %9.6f [sec]\n" (/ (- end start) 1000.0)))
 
(define (main args)
  (with-handlers
    ([exn:fail? (λ (e) (eprintf "%s\n" (exn-message e)))]
     [exn? (λ (e) (eprintf "Unexpected: %s\n" (exn-message e)))])
  (cond
    [(not (= (vector-length args) 2))
      (error "Usage: sort <infile> <outfile>")]
    [else
      (let ([infile (vector-ref args 0)]
            [outfile (vector-ref args 1)])
        (test infile outfile))])))
 
(main (current-command-line-arguments))


 基準データ:uniform1m.txt, normal1m.txtで測定する。
$ raco exe list5.rkt
$ measure ./sort uniform1m.txt sorted_u1m.txt
length = 1000000
read time:   0.775872 [sec]
sort time:   0.268304 [sec]
write time:  0.914904 [sec]
======================================
Process exited with status: 0
total time:  2.438084 [sec]
mem  size:     166360 [KB]
code size:      12445 [KB]
$ measure ./sort normal1m.txt sorted_n1m.txt
length = 1000000
read time:   0.786007 [sec]
sort time:   0.263898 [sec]
write time:  0.943548 [sec]
======================================
Process exited with status: 0
total time:  2.463952 [sec]
mem  size:     166484 [KB]
code size:      12445 [KB]
 測定結果を表にまとめる。単位は秒[sec]。
測定項目 GCC Clang OCaml Racket
一様分布 read file 0.094 0.094 0.299 0.776
sort 0.119 0.118 0.600 0.268
write file 0.093 0.094 0.262 0.915
正規分布 read file 0.093 0.091 0.272 0.786
sort 0.118 0.121 0.618 0.264
write file 0.092 0.092 0.247 0.944
This document is licensed under the MIT License.
Copyright (C) 2024 Takeshi Umetani