Diary

Diary?

学生の研究日記だったらしいです。多分。

開発日記。

オススメの本(頂いた本):

いちばんあたらしいの2017 5/23 16:57

_23(Tue)

Intel CPU で連続アクセスは速い、という話。

メモリをざらっとなめる処理があるとする。

	for (i=0; i<s; i++) {
		RVALUE *val = &vals[i];
		if (val->flags) {
		    do_something_wrong(val);
		}
	}

vals にある val をイチイチ見ていって、flags を見て、0 以外ならなんかする、という処理。

これを高速化しようと思う。よく見ると、ある条件だったら、この分岐をスキップできることがわかったとする。例えば、一つおきに見るだけで良い、ということがわかったとする。

	for (i=0; i<s; i++) {
	    if (!(*skip_func)(i)) {
		RVALUE *val = &vals[i];
		if (val->flags) {
		    do_something_wrong(val);
		}
	    }
	}

skip_func が !true の時だけ条件を見るので、メモリアクセスがなくなり、速くなりそうだ。skip_func は実験のために入れ替え可能にしておく。これをまとめたプログラムが以下。

#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef struct {
    uint64_t flags;
    uint64_t data[4];
}RVALUE;

__attribute__((noinline))
static void
do_something_wrong(RVALUE *val)
{
    fprintf(stderr, "do_something_wrong: %p\n", val);
}

static int skip_0(int n){return (n%10) < 0;} /* 0/10, none */
static int skip_1(int n){return (n%10) < 1;} /* 1/10 */
static int skip_2(int n){return (n%10) < 2;} /* 2/10 */
static int skip_3(int n){return (n%10) < 3;} /* 3/10 */
static int skip_4(int n){return (n%10) < 4;} /* 4/10 */
static int skip_5(int n){return (n%10) < 5;} /* 5/10 */
static int skip_6(int n){return (n%10) < 6;} /* 6/10 */
static int skip_7(int n){return (n%10) < 7;} /* 7/10 */
static int skip_8(int n){return (n%10) < 8;} /* 8/10 */
static int skip_9(int n){return (n%10) < 9;} /* 9/10 */
static int skip_a(int n){return (n%10) < 10;} /* 10/10, all */

int
main(int argc, char *argv[])
{
    int i, j;
    const int s = 1024 * 1024 * 10; /* 10M */
    RVALUE *vals = (RVALUE *)calloc(sizeof(RVALUE), s);
    int (*skip_func)(int);

    if (argc < 2) {
	exit(1);
    }

    skip_func = NULL;
    switch (argv[1][0]) {
      case '0':
	skip_func = skip_0;
	break;
      case '1':
	skip_func = skip_1;
	break;
      case '2':
	skip_func = skip_2;
	break;
      case '3':
	skip_func = skip_3;
	break;
      case '4':
	skip_func = skip_4;
	break;
      case '5':
	skip_func = skip_5;
	break;
      case '6':
	skip_func = skip_6;
	break;
      case '7':
	skip_func = skip_7;
	break;
      case '8':
	skip_func = skip_8;
	break;
      case '9':
	skip_func = skip_9;
	break;
      case 'a':
	skip_func = skip_a;
	break;
      default:
	fprintf(stderr, "unsupported: %s\n", argv[1]);
	exit(1);
    }

    for (j=0; j<100; j++) {
	for (i=0; i<s; i++) {
	    if (!(*skip_func)(i)) {
		RVALUE *val = &vals[i];
		if (val->flags) {
		    do_something_wrong(val);
		}
	    }
	}
    }
}

0 を渡すと skip_0() が使われる。これは、必ず 0 を返すので、つまり skip しない。1 だと、10 回中 1 回 skip する。... 9 だと 10 回中 9 回スキップする。a だと、必ず 1 を返すので全部 skip する。

さて、実験結果を示す。

model name      : Intel(R) Core(TM) i5-3380M CPU @ 2.90GHz

       user     system      total        real
0  0.000000   0.000000   3.200000 (  3.213057)
1  0.000000   0.000000   3.980000 (  3.988034)
2  0.000000   0.000000   4.690000 (  4.680758)
3  0.000000   0.000000   5.370000 (  5.374492)
4  0.000000   0.000000   6.730000 (  6.750303)
5  0.000000   0.000000   7.080000 (  7.078628)
6  0.000000   0.000000   6.940000 (  6.946066)
7  0.000000   0.000000   6.210000 (  6.216578)
8  0.000000   0.000000   8.360000 (  8.358197)
9  0.000000   0.000000   5.480000 (  5.483612)
a  0.010000   0.000000   1.970000 (  1.965609)
model name      : Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz

       user     system      total        real
0  0.000000   0.000000   2.200000 (  2.205095)
1  0.000000   0.000000   2.310000 (  2.317280)
2  0.000000   0.000000   2.460000 (  2.460013)
3  0.000000   0.000000   2.540000 (  2.538290)
4  0.000000   0.000000   2.650000 (  2.649510)
5  0.000000   0.000000   2.760000 (  2.764821)
6  0.000000   0.000000   3.280000 (  3.283949)
7  0.000000   0.000000   4.120000 (  4.124645)
8  0.000000   0.000000   7.710000 (  7.709284)
9  0.000000   0.000000   3.730000 (  3.722011)
a  0.000000   0.000000   1.440000 (  1.450049)
ラズパイ3
       user     system      total        real
0  0.000000   0.000000   5.100000 (  5.119621)
1  0.000000   0.010000   5.660000 (  5.661095)
2  0.000000   0.000000   5.930000 (  5.938469)
3  0.000000   0.000000   6.430000 (  6.439446)
4  0.000000   0.000000   6.360000 (  6.391132)
5  0.000000   0.000000   5.840000 (  5.852021)
6  0.000000   0.000000   5.660000 (  5.674228)
7  0.000000   0.000000   6.890000 (  6.894989)
8  0.000000   0.000000   7.550000 (  7.564158)
9  0.000000   0.000000   6.390000 (  6.446177)
a  0.000000   0.010000   3.310000 (  3.308592)```

どれも、全部アクセスしたほうが速い(もちろん、全部スキップが一番速いが、それは比較対象ということで)。

i7-6700 を使って、perf を回してみる。

./a.out 1

 Performance counter stats for './a.out 1':

       2327.312108      task-clock (msec)         #    1.000 CPUs utilized
                 3      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               754      page-faults               #    0.324 K/sec
     8,552,271,698      cycles                    #    3.675 GHz                      (30.39%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    25,136,427,035      instructions              #    2.94  insns per cycle          (38.13%)
     5,154,158,059      branches                  # 2214.640 M/sec                    (38.29%)
            23,945      branch-misses             #    0.00% of all branches          (38.86%)
     1,964,678,774      L1-dcache-loads           #  844.184 M/sec                    (38.97%)
       667,506,576      L1-dcache-load-misses     #   33.98% of all L1-dcache hits    (38.94%)
        27,388,843      LLC-loads                 #   11.768 M/sec                    (31.10%)
        18,668,574      LLC-load-misses           #  136.32% of all LL-cache hits     (31.05%)
   <not supported>      L1-icache-loads
           209,135      L1-icache-load-misses     #    0.090 M/sec                    (30.99%)
     1,995,968,384      dTLB-loads                #  857.628 M/sec                    (30.94%)
               517      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.78%)
               108      iTLB-loads                #    0.046 K/sec                    (30.60%)
                56      iTLB-load-misses          #   51.85% of all iTLB cache hits   (30.43%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       2.327875472 seconds time elapsed

./a.out 2

 Performance counter stats for './a.out 2':

       2479.055604      task-clock (msec)         #    1.000 CPUs utilized
                 2      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               753      page-faults               #    0.304 K/sec
     9,129,307,476      cycles                    #    3.683 GHz                      (30.37%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    24,642,643,947      instructions              #    2.70  insns per cycle          (38.56%)
     4,996,575,860      branches                  # 2015.516 M/sec                    (38.66%)
            24,533      branch-misses             #    0.00% of all branches          (38.76%)
     1,863,534,823      L1-dcache-loads           #  751.712 M/sec                    (38.86%)
       665,528,308      L1-dcache-load-misses     #   35.71% of all L1-dcache hits    (38.96%)
        30,044,469      LLC-loads                 #   12.119 M/sec                    (31.12%)
        21,559,455      LLC-load-misses           #  143.52% of all LL-cache hits     (31.07%)
   <not supported>      L1-icache-loads
           212,872      L1-icache-load-misses     #    0.086 M/sec                    (31.02%)
     1,893,886,927      dTLB-loads                #  763.955 M/sec                    (30.95%)
               971      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.78%)
               376      iTLB-loads                #    0.152 K/sec                    (30.62%)
                39      iTLB-load-misses          #   10.37% of all iTLB cache hits   (30.46%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       2.479202848 seconds time elapsed

./a.out 3

 Performance counter stats for './a.out 3':

       2537.381335      task-clock (msec)         #    1.000 CPUs utilized
                 4      context-switches          #    0.002 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               756      page-faults               #    0.298 K/sec
     9,356,138,785      cycles                    #    3.687 GHz                      (30.48%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    24,727,249,626      instructions              #    2.64  insns per cycle          (38.20%)
     4,948,329,824      branches                  # 1950.172 M/sec                    (38.20%)
            23,307      branch-misses             #    0.00% of all branches          (38.35%)
     1,759,461,670      L1-dcache-loads           #  693.416 M/sec                    (38.88%)
       654,410,524      L1-dcache-load-misses     #   37.19% of all L1-dcache hits    (38.94%)
        39,766,013      LLC-loads                 #   15.672 M/sec                    (31.10%)
        30,325,538      LLC-load-misses           #  152.52% of all LL-cache hits     (31.05%)
   <not supported>      L1-icache-loads
           189,102      L1-icache-load-misses     #    0.075 M/sec                    (31.00%)
     1,783,276,624      dTLB-loads                #  702.802 M/sec                    (30.95%)
               634      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.90%)
               250      iTLB-loads                #    0.099 K/sec                    (30.76%)
                42      iTLB-load-misses          #   16.80% of all iTLB cache hits   (30.60%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       2.537501039 seconds time elapsed

./a.out 4

 Performance counter stats for './a.out 4':

       2653.688467      task-clock (msec)         #    1.000 CPUs utilized
                 4      context-switches          #    0.002 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               755      page-faults               #    0.285 K/sec
     9,273,553,579      cycles                    #    3.495 GHz                      (30.81%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    24,463,594,085      instructions              #    2.64  insns per cycle          (38.50%)
     4,833,049,097      branches                  # 1821.257 M/sec                    (38.50%)
            38,561      branch-misses             #    0.00% of all branches          (38.50%)
     1,669,552,121      L1-dcache-loads           #  629.144 M/sec                    (38.50%)
       621,956,935      L1-dcache-load-misses     #   37.25% of all L1-dcache hits    (38.47%)
        54,474,331      LLC-loads                 #   20.528 M/sec                    (31.06%)
        43,355,855      LLC-load-misses           #  159.18% of all LL-cache hits     (31.01%)
   <not supported>      L1-icache-loads
           246,031      L1-icache-load-misses     #    0.093 M/sec                    (30.96%)
     1,677,351,049      dTLB-loads                #  632.083 M/sec                    (30.92%)
               515      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.87%)
               483      iTLB-loads                #    0.182 K/sec                    (30.82%)
                 3      iTLB-load-misses          #    0.62% of all iTLB cache hits   (30.78%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       2.653860646 seconds time elapsed

./a.out 5

 Performance counter stats for './a.out 5':

       2681.594573      task-clock (msec)         #    1.000 CPUs utilized
                 2      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               753      page-faults               #    0.281 K/sec
     9,783,738,826      cycles                    #    3.648 GHz                      (30.81%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    24,068,897,294      instructions              #    2.46  insns per cycle          (38.59%)
     4,699,861,998      branches                  # 1752.637 M/sec                    (38.68%)
            25,116      branch-misses             #    0.00% of all branches          (38.77%)
     1,560,676,047      L1-dcache-loads           #  581.996 M/sec                    (38.86%)
       572,066,365      L1-dcache-load-misses     #   36.66% of all L1-dcache hits    (38.90%)
        63,843,875      LLC-loads                 #   23.808 M/sec                    (31.07%)
        50,944,021      LLC-load-misses           #  159.59% of all LL-cache hits     (31.03%)
   <not supported>      L1-icache-loads
           204,683      L1-icache-load-misses     #    0.076 M/sec                    (30.88%)
     1,578,661,520      dTLB-loads                #  588.703 M/sec                    (30.73%)
               814      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.58%)
               292      iTLB-loads                #    0.109 K/sec                    (30.43%)
                16      iTLB-load-misses          #    5.48% of all iTLB cache hits   (30.43%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       2.681733196 seconds time elapsed

./a.out 6

 Performance counter stats for './a.out 6':

       3095.983614      task-clock (msec)         #    1.000 CPUs utilized
                 3      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               753      page-faults               #    0.243 K/sec
    11,037,737,838      cycles                    #    3.565 GHz                      (30.86%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    23,859,344,685      instructions              #    2.16  insns per cycle          (38.62%)
     4,597,358,232      branches                  # 1484.943 M/sec                    (38.70%)
            27,811      branch-misses             #    0.00% of all branches          (38.78%)
     1,457,689,889      L1-dcache-loads           #  470.833 M/sec                    (38.86%)
       476,795,783      L1-dcache-load-misses     #   32.71% of all L1-dcache hits    (38.81%)
        63,475,064      LLC-loads                 #   20.502 M/sec                    (31.01%)
        58,835,138      LLC-load-misses           #  185.38% of all LL-cache hits     (30.88%)
   <not supported>      L1-icache-loads
           293,526      L1-icache-load-misses     #    0.095 M/sec                    (30.76%)
     1,477,587,652      dTLB-loads                #  477.260 M/sec                    (30.63%)
             1,026      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.50%)
             3,463      iTLB-loads                #    0.001 M/sec                    (30.49%)
                 6      iTLB-load-misses          #    0.17% of all iTLB cache hits   (30.77%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       3.096129522 seconds time elapsed

./a.out 7

 Performance counter stats for './a.out 7':

       3572.644866      task-clock (msec)         #    1.000 CPUs utilized
                 3      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               754      page-faults               #    0.211 K/sec
    12,181,180,947      cycles                    #    3.410 GHz                      (30.56%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    23,623,060,110      instructions              #    1.94  insns per cycle          (38.59%)
     4,486,955,892      branches                  # 1255.920 M/sec                    (38.66%)
            28,584      branch-misses             #    0.00% of all branches          (38.73%)
     1,353,500,319      L1-dcache-loads           #  378.851 M/sec                    (38.80%)
       407,229,242      L1-dcache-load-misses     #   30.09% of all L1-dcache hits    (38.77%)
        81,939,252      LLC-loads                 #   22.935 M/sec                    (30.98%)
        81,433,098      LLC-load-misses           #  198.76% of all LL-cache hits     (30.95%)
   <not supported>      L1-icache-loads
           290,817      L1-icache-load-misses     #    0.081 M/sec                    (30.91%)
     1,370,395,850      dTLB-loads                #  383.580 M/sec                    (30.82%)
               755      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.71%)
             1,023      iTLB-loads                #    0.286 K/sec                    (30.60%)
                72      iTLB-load-misses          #    7.04% of all iTLB cache hits   (30.49%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       3.572793663 seconds time elapsed

./a.out 8

 Performance counter stats for './a.out 8':

       7913.908840      task-clock (msec)         #    1.000 CPUs utilized
                 4      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               754      page-faults               #    0.095 K/sec
    18,954,643,011      cycles                    #    2.395 GHz                      (30.79%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    23,582,079,752      instructions              #    1.24  insns per cycle          (38.51%)
     4,421,746,574      branches                  #  558.731 M/sec                    (38.54%)
            42,481      branch-misses             #    0.00% of all branches          (38.54%)
     1,263,967,657      L1-dcache-loads           #  159.715 M/sec                    (38.54%)
       231,031,943      L1-dcache-load-misses     #   18.28% of all L1-dcache hits    (38.51%)
       151,673,082      LLC-loads                 #   19.165 M/sec                    (30.74%)
       147,241,641      LLC-load-misses           #  194.16% of all LL-cache hits     (30.85%)
   <not supported>      L1-icache-loads
           603,917      L1-icache-load-misses     #    0.076 M/sec                    (30.84%)
     1,260,088,327      dTLB-loads                #  159.225 M/sec                    (30.82%)
               730      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.81%)
               727      iTLB-loads                #    0.092 K/sec                    (30.79%)
             1,615      iTLB-load-misses          #  222.15% of all iTLB cache hits   (30.78%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       7.913997921 seconds time elapsed

./a.out 9

 Performance counter stats for './a.out 9':

       3641.973232      task-clock (msec)         #    1.000 CPUs utilized
                 3      context-switches          #    0.001 K/sec
                 0      cpu-migrations            #    0.000 K/sec
               752      page-faults               #    0.206 K/sec
    13,417,059,445      cycles                    #    3.684 GHz                      (30.81%)
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
    23,405,171,543      instructions              #    1.74  insns per cycle          (38.49%)
     4,313,153,885      branches                  # 1184.290 M/sec                    (38.49%)
            31,343      branch-misses             #    0.00% of all branches          (38.49%)
     1,154,790,590      L1-dcache-loads           #  317.078 M/sec                    (38.49%)
       131,470,216      L1-dcache-load-misses     #   11.38% of all L1-dcache hits    (38.47%)
        11,868,351      LLC-loads                 #    3.259 M/sec                    (30.98%)
        11,264,153      LLC-load-misses           #  189.82% of all LL-cache hits     (30.95%)
   <not supported>      L1-icache-loads
           322,708      L1-icache-load-misses     #    0.089 M/sec                    (30.91%)
     1,150,387,637      dTLB-loads                #  315.869 M/sec                    (30.88%)
             1,060      dTLB-load-misses          #    0.00% of all dTLB cache hits   (30.85%)
               406      iTLB-loads                #    0.111 K/sec                    (30.81%)
                10      iTLB-load-misses          #    2.46% of all iTLB cache hits   (30.78%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       3.642148586 seconds time elapsed

LLC で grep してみる(last level cache かと思ったら、naruse さんによると longest latency cache らしい)。

        27,388,843      LLC-loads                 #   11.768 M/sec                    (31.10%)
        18,668,574      LLC-load-misses           #  136.32% of all LL-cache hits     (31.05%)
        30,044,469      LLC-loads                 #   12.119 M/sec                    (31.12%)
        21,559,455      LLC-load-misses           #  143.52% of all LL-cache hits     (31.07%)
        39,766,013      LLC-loads                 #   15.672 M/sec                    (31.10%)
        30,325,538      LLC-load-misses           #  152.52% of all LL-cache hits     (31.05%)
        54,474,331      LLC-loads                 #   20.528 M/sec                    (31.06%)
        43,355,855      LLC-load-misses           #  159.18% of all LL-cache hits     (31.01%)
        63,843,875      LLC-loads                 #   23.808 M/sec                    (31.07%)
        50,944,021      LLC-load-misses           #  159.59% of all LL-cache hits     (31.03%)
        63,475,064      LLC-loads                 #   20.502 M/sec                    (31.01%)
        58,835,138      LLC-load-misses           #  185.38% of all LL-cache hits     (30.88%)
        81,939,252      LLC-loads                 #   22.935 M/sec                    (30.98%)
        81,433,098      LLC-load-misses           #  198.76% of all LL-cache hits     (30.95%)
       151,673,082      LLC-loads                 #   19.165 M/sec                    (30.74%)
       147,241,641      LLC-load-misses           #  194.16% of all LL-cache hits     (30.85%)
        11,868,351      LLC-loads                 #    3.259 M/sec                    (30.98%)
        11,264,153      LLC-load-misses           #  189.82% of all LL-cache hits     (30.95%)

どう見ても、LLC-load が増えてる。ミスも増えてる(ほとんどミスする)。

さて、これは一体なんでか。LLC の prefetch じゃないか、というのが仮説。


「インテル&#174; 64 アーキテクチャーおよびIA-32 アーキテクチャー最適化リファレンス・マニュアル」を見ると、

ストリーマー:昇順および降順のアドレスシーケンスに対して、L1 キャッシュからの読み込み要求を監視する。監視される読み込み要求には、ロード操作とストア操作およびハードウェア・プリフェッチによって開始されたL1 D キャッシュ要求、およびコードフェッチに対するL1 命令キャッシュ要求が含まれる。前方または後方の要求ストリームが検出されると、予想されるキャッシュラインがプリフェッチされる。プリフェッチされる キャッシュラインは同じ4Kページになければならない。

とあるので、これが働くかどうかが勝負の分かれ目っぽい。


というわけで、とりあえずうまくいかないことがわかりました。

Log

2002 01 02 03 04 05 06 07 08 09 10 11 12
2003 01 02 03 04 05 06 07 08 09 10 11 12
2004 01 02 03 04 05 06 07 08 09 10 11 12
2005 01 02 03 04 05 06 07 08 09 10 11 12
2006 01 02 03 04 05 06 07 08 09 10 11 12
2007 01 02 03 04 05 06 07 08 09 10 11 12
2008 01 02 03 04 05 06 07 08 09 10 11 12
2009 01 02 03 04 05 06 07 08 09 10 11 12
2010 01 02 03 04 05 06 07 08 09 10 11 12
2011 01 02 03 04 05 06 07 08 09 10 11 12
2012 01 02 03 04 05 06 07 08 09 10 11 12
2013 01 02 03 04 05 06 07 08 09 10 11 12
2014 01 02 03 04 05 06 07 08 09 10 11 12
2015 01 02 03 04 05 06 07 08 09 10 11 12
2016 01 02 03 04 05 06 07 08 09 10 11 12
2017 01 02 03 04 05 06 07 08 09 10 11 12

SASADA Koichi (ko1 at atdot dot net) / Skype ID: ko1_ssd


rss