6. 配列
著者:梅谷 武
各言語における配列の扱いを調べる。
作成:2024-06-05
更新:2024-09-16
 関数型プログラミングにおいては副作用を排除し、状態を持たない純粋関数だけでプログラムを構成することが基本原則とされる。このため関数型言語における変数(variable)は最初に初期値を設定した後、最後までその値は不変(immutable)であることが求められる。これは命令型言語に慣れた人間にとってはかなり大きなパラダイムシフトであり、実際に関数型言語でプログラミングするとき大いに戸惑う。そして副作用の排除には予測可能性、参照透過性、テストとデバッグの容易さ、並列処理における競合リスクの低下等さまざまな利点があるという解説を読んで計算機科学の奥の深さを実感させられる。
 命令型言語をすでに習得している初心者はどうしても可変(mutable)な変数を使いたくなり、OCamlやRacketにおいてはこのような要求に応えるためなのかどうか可変(mutable)なデータ構造も提供されている。ここではC++におけるstd::vectorとOCamlにおける可変配列Array、Racketにおける可変配列vectorの基本動作を比較する。

C++

 Cの配列を使うことで最も高い性能やメモリ効率を得ることができるが、Modern C++では安全性や機能性の観点からstd::vectorを使うことが推奨されている。
 配列の実装にはModule Arrayを用いる。
 配列の実装にはvectorを用いる。vectorについての概説はThe Racket Guide>3 Built-in Datatypes>3.9 Vectors、詳細仕様はThe Racket Reference>4 Datatypes>4.12 Vectorsにある。
  1. 代入:長さnの配列src,dstを生成し、src[i] = i, i=0,...,n-1.
  2. 複写:dst = src
  3. 写像:dst = f(src), f(x) = x + x.
  4. 自己写像:src = f(src), f(x) = x + x.
  5. 左重畳:suml = (vector-foldl (λ (acc x) (+ acc x)) 0 src)
  6. 右重畳:sumr = (vector-foldr (λ (x acc) (+ x acc)) 0 src)
 C++とOCamlは64ビット整数配列、Racketは整数配列を用いる。

C++

 array.cpp:
#include <algorithm>
#include <chrono>
#include <fstream>
#include <iostream>
#include <numeric>
#include <print>
#include <stdexcept>
#include <string>
#include <vector>
 
void test(long n) {
  // assign
  auto start = std::chrono::high_resolution_clock::now();
  std::vector<long> src(n, 0l), dst(n, -1l);
  for (long i = 0l; i < n; ++i)
    src[i] = i;
  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> duration = end - start;
  std::println("length = {}", n);
  std::println("src: {} ---> {}", src[0], src[n - 1]);
  std::println("assign time:{:9.6f} [sec]", duration.count());
  // copy
  start = std::chrono::high_resolution_clock::now();
  std::copy(src.begin(), src.end(), dst.begin());
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("dst: {} ---> {}", dst[0], dst[n - 1]);
  std::println("copy time:  {:9.6f} [sec]", duration.count());
  // map
  start = std::chrono::high_resolution_clock::now();
  std::transform(src.begin(), src.end(), dst.begin(),
                 [](int x) { return x + x; });
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("dst: {} ---> {}", dst[0], dst[n - 1]);
  std::println("map time:   {:9.6f} [sec]", duration.count());
  // self-map
  start = std::chrono::high_resolution_clock::now();
  std::transform(src.begin(), src.end(), src.begin(),
                 [](int x) { return x + x; });
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("src: {} ---> {}", src[0], src[n - 1]);
  std::println("self-map:   {:9.6f} [sec]", duration.count());
  // foldl
  start = std::chrono::high_resolution_clock::now();
  long suml = std::accumulate(src.begin(), src.end(), 0l,
                              [](long x, long acc) { return (x + acc); });
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("foldl time: {:9.6f} [sec]", duration.count());
  // foldr
  start = std::chrono::high_resolution_clock::now();
  long sumr = std::accumulate(src.rbegin(), src.rend(), 0l,
                              [](long x, long acc) { return (x + acc); });
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("foldr time: {:9.6f} [sec]", duration.count());
  if (suml == sumr)
    std::println("sum = {}", suml);
  else
    std::println("suml ≠ sumr", suml);
}
 
int main(int argc, char *argv[]) {
  try {
    if (argc != 2) {
      throw std::invalid_argument(
          "Usage: array <non-negative integer>");
    }
    long n = std::stol(argv[1]);
    if (n < 0l) {
      throw std::invalid_argument(
          "The argument must be a non-negative integer.");
    }
    test(n);
  } catch (const std::logic_error &e) {
    std::println(stderr, "Logic error: {}", e.what());
  } catch (const std::runtime_error &e) {
    std::println(stderr, "Runtime error: {}", e.what());
  } catch (const std::exception &e) {
    std::println(stderr, "Another std::exception: {}", e.what());
  } catch (...) {
    std::println(stderr, "Unknown exception.");
  }
  return 0;
}


 makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = array.cpp
EXES = array_g array_l
 
all: $(EXES)
 
array_g: $(SRC)
	$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
 
array_l: $(SRC)
	$(CPPL) $(CPPFLAGS) -o $@ $(SRC)
 
clean:
	rm -f $(EXES)


 n = 10000000で測定する。
$ measure ./array_g 10000000
length = 10000000
src: 0 ---> 9999999
assign time: 0.071241 [sec]
dst: 0 ---> 9999999
copy time:   0.022285 [sec]
dst: 0 ---> 19999998
map time:    0.026023 [sec]
src: 0 ---> 19999998
self-map:    0.014224 [sec]
foldl time:  0.009649 [sec]
foldr time:  0.009653 [sec]
sum = 99999990000000
======================================
Process exited with status: 0
total time:  0.157648 [sec]
mem  size:     159356 [KB]
code size:        128 [KB]
$ measure ./array_l 10000000
length = 10000000
src: 0 ---> 9999999
assign time: 0.070455 [sec]
dst: 0 ---> 9999999
copy time:   0.016643 [sec]
dst: 0 ---> 19999998
map time:    0.026382 [sec]
src: 0 ---> 19999998
self-map:    0.016053 [sec]
foldl time:  0.006832 [sec]
foldr time:  0.007674 [sec]
sum = 99999990000000
======================================
Process exited with status: 0
total time:  0.147391 [sec]
mem  size:     159308 [KB]
code size:        101 [KB]
 Array.copy、Array.mapでは新しい配列を生成する。
 array.ml:
open Printf
 
let test n =
  (* assign *)
  let start = Unix.gettimeofday () in
  let src = Array.make n 0 in
  for i = 0 to n - 1 do
    src.(i) <- i
  done;
  let end_ = Unix.gettimeofday () in
  printf "length = %d\n" n;
  printf "src: %d ---> %d\n" src.(0) src.(n-1);
  printf "assign time:%9.6f [sec]\n" (end_ -. start);
  (* copy *)
  let start = Unix.gettimeofday () in
  let dst = Array.copy src in
  let end_ = Unix.gettimeofday () in
  printf "dst: %d ---> %d\n" dst.(0) dst.(n-1);
  printf "copy time:  %9.6f [sec]\n" (end_ -. start);
  (* map *)
  let start = Unix.gettimeofday () in
  let dst = Array.map (fun x -> x + x) src in
  let end_ = Unix.gettimeofday () in
  printf "dst: %d ---> %d\n" dst.(0) dst.(n-1);
  printf "map time:   %9.6f [sec]\n" (end_ -. start);
  (* self-map *)
  let start = Unix.gettimeofday () in
  Array.iteri (fun i x -> src.(i) <- x + x) src;
  let end_ = Unix.gettimeofday () in
  printf "src: %d ---> %d\n" src.(0) src.(n-1);
  printf "self-map:   %9.6f [sec]\n" (end_ -. start);
  (* foldl *)
  let start = Unix.gettimeofday () in
  let suml = Array.fold_left (fun acc x -> acc + x) 0 src in
  let end_ = Unix.gettimeofday () in
  printf "foldl time: %9.6f [sec]\n" (end_ -. start);
  (* foldr *)
  let start = Unix.gettimeofday () in
  let sumr = Array.fold_right (fun x acc -> x + acc) src 0 in
  let end_ = Unix.gettimeofday () in
  printf "foldr time: %9.6f [sec]\n" (end_ -. start);
  if (suml = sumr) then
    printf "sum = %d\n" suml
  else
    printf "suml ≠ sumr\n"
 
let () =
  try
    if Array.length Sys.argv <> 2 then
      raise (Invalid_argument
        "Usage: array <positive integer>")
    else
      let n = int_of_string Sys.argv.(1) in
      if n <= 0 then
        raise (Invalid_argument
          "The argument must be a positive integer.")
      else
        test n
  with
    | Invalid_argument msg ->
        eprintf "Logic error: %s\n" msg
    | Failure msg ->
        eprintf "Runtime error: %s\n" msg
    | exn ->
        eprintf "Another exception: %s\n" (Printexc.to_string exn)


 makefile:
OCAMLOPT = ocamlopt
OCAMLFLAGS = -O2
LIBPATH = -I +unix
LIBS = unix.cmxa
SRC = array.ml
EXES = array
 
all: $(EXES)
 
array: $(SRC)
	$(OCAMLOPT) $(OCAMLFLAGS) $(LIBPATH) -o $@ $(LIBS) $(SRC)
 
clean:
	rm -f $(EXES) *.o *.cmx *.cmi


 n = 10000000で測定する。
$ measure ./array 10000000
length = 10000000
src: 0 ---> 9999999
assign time: 0.060781 [sec]
dst: 0 ---> 9999999
copy time:   0.072694 [sec]
dst: 0 ---> 19999998
map time:    0.125711 [sec]
src: 0 ---> 19999998
self-map:    0.036473 [sec]
foldl time:  0.030570 [sec]
foldr time:  0.039808 [sec]
sum = 99999990000000
======================================
Process exited with status: 0
total time:  0.380341 [sec]
mem  size:     238192 [KB]
code size:       3291 [KB]
 (vector-copy! dst 0 src)では既存の配列dstに破壊的代入をする。vector-mapは新しい配列を生成する。
 array.rkt:
#lang racket
(require iso-printf)
 
(define (vector-foldl f init vec)
  (let loop ([acc init] [i 0])
    (if (< i (vector-length vec))
        (loop (f acc (vector-ref vec i)) (add1 i))
        acc)))
 
(define (vector-foldr f init vec)
  (let loop ([acc init] [i (sub1 (vector-length vec))])
    (if (>= i 0)
        (loop (f (vector-ref vec i) acc) (sub1 i))
        acc)))
 
(define (test n)
  ; assign
  (define start (current-inexact-milliseconds))
  (define src (make-vector n 0))
  (define dst (make-vector n -1))
  (for ([i (in-range n)])
    (vector-set! src i i))
  (define end (current-inexact-milliseconds))
  (printf "length = %d\n" n)
  (printf "src: %d ---> %d\n" (vector-ref src 0) (vector-ref src (- n 1)))
  (printf "assign time:%9.6f [sec]\n" (/ (- end start) 1000.0))
  ; copy
  (set! start (current-inexact-milliseconds))
  (vector-copy! dst 0 src)
  (set! end (current-inexact-milliseconds))
  (printf "dst: %d ---> %d\n" (vector-ref dst 0) (vector-ref dst (- n 1)))
  (printf "copy time:  %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; map
  (set! start (current-inexact-milliseconds))
  (set! dst (vector-map (λ (i) (+ i i)) src))
  (set! end (current-inexact-milliseconds))
  (printf "dst: %d ---> %d\n" (vector-ref dst 0) (vector-ref dst (- n 1)))
  (printf "map time:   %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; self-map
  (set! start (current-inexact-milliseconds))
  (vector-map! (λ (i) (+ i i)) src)
  (set! end (current-inexact-milliseconds))
  (printf "src: %d ---> %d\n" (vector-ref src 0) (vector-ref src (- n 1)))
  (printf "self-map:   %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; foldl
  (set! start (current-inexact-milliseconds))
  (define suml (vector-foldl (λ (acc x) (+ acc x)) 0 src))
  (set! end (current-inexact-milliseconds))
  (printf "foldl time: %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; foldr
  (set! start (current-inexact-milliseconds))
  (define sumr (vector-foldr (λ (x acc) (+ x acc)) 0 src))
  (set! end (current-inexact-milliseconds))
  (printf "foldr time: %9.6f [sec]\n" (/ (- end start) 1000.0))
  (if (= suml sumr)
    (printf "sum = %d\n" suml)
    (printf "suml ≠ sumr\n")))
 
(define (main args)
  (with-handlers
    ([exn:fail? (λ (e) (eprintf "%s\n" (exn-message e)))]
     [exn? (λ (e) (eprintf "Unexpected: %s\n" (exn-message e)))])
  (cond
    [(not (= (vector-length args) 1))
      (error "Usage: array <positive integer>")]
    [else
      (let ([n (string->number (vector-ref args 0))])
        (cond
          [(not (integer? n))
            (error "The argument must be an integer.")]
          [(<= n 0)
            (error "The argument must be a positive integer.")]
          [else
            (test n)]))])))
 
(main (current-command-line-arguments))


 n = 10000000で測定する。
$ raco exe array.rkt
$ measure ./array 10000000
length = 10000000
src: 0 ---> 9999999
assign time: 0.246857 [sec]
dst: 0 ---> 9999999
copy time:   0.032239 [sec]
dst: 0 ---> 19999998
map time:    0.577591 [sec]
src: 0 ---> 19999998
self-map:    0.350188 [sec]
foldl time:  0.063333 [sec]
foldr time:  0.058330 [sec]
sum = 99999990000000
======================================
Process exited with status: 0
total time:  1.789718 [sec]
mem  size:     598656 [KB]
code size:      12447 [KB]
 測定結果を表にまとめる。単位は秒[sec]。
測定項目 GCC Clang OCaml Racket
assign 0.071 0.070 0.061 0.247
copy 0.022 0.017 0.073 0.032
map 0.026 0.026 0.126 0.578
self-map 0.014 0.016 0.036 0.350
foldl 0.010 0.007 0.031 0.063
foldr 0.010 0.008 0.040 0.058
This document is licensed under the MIT License.
Copyright (C) 2024 Takeshi Umetani