7 #include <sys/resource.h>
11 #include <sys/types.h>
14 #include <libkern/OSAtomic.h>
17 #include <CoreFoundation/CoreFoundation.h>
19 #include <IOKit/IOKitLib.h>
23 #define IO_MODE_RANDOM 1
25 #define WORKLOAD_TYPE_RO 0
26 #define WORKLOAD_TYPE_WO 1
27 #define WORKLOAD_TYPE_RW 2
29 #define MAX_THREADS 1000
30 #define MAX_FILENAME 64
31 #define MAX_ITERATIONS 10000
32 #define LATENCY_BIN_SIZE 1000
33 #define LATENCY_BINS 31
34 #define LOW_LATENCY_BIN_SIZE 50
35 #define LOW_LATENCY_BINS 21
36 #define THROUGHPUT_INTERVAL 5000
37 #define DEFAULT_FILE_SIZE (262144)
38 #define BLOCKSIZE 1024
39 #define MAX_CMD_SIZE 256
40 #define PG_MASK ~(0xFFF)
41 #define kIONVMeANS2ControllerString "AppleANS2Controller"
42 #define kIONVMeANS2EmbeddedControllerString "AppleANS2NVMeController"
43 #define kIONVMeControllerString "AppleNVMeController"
51 int burst_count
= 10; /* Unit: Number ; Desc.: I/O Burst Count */
52 int inter_burst_duration
= 0; /* Unit: msecs ; Desc.: I/O Inter-Burst Duration (-1: Random value [0,100]) */
53 int inter_io_delay_ms
= 0; /* Unit: msecs ; Desc.: Inter I/O Delay */
54 int thread_count
= 1; /* Unit: Number ; Desc.: Thread Count */
55 int workload_type
= WORKLOAD_TYPE_RO
; /* Unit: 0/1/2 ; Desc.: Workload Type */
56 int io_size
= 4096; /* Unit: Bytes ; Desc.: I/O Unit Size */
57 int sync_frequency_ms
= 0; /* Unit: msecs ; Desc.: Sync thread frequency (0: Indicates no sync) */
58 int io_mode
= 0; /* Unit: 0/1 ; Desc.: I/O Mode (Seq./Rand.) */
59 int test_duration
= 0; /* Unit: secs ; Desc.: Total Test Duration (0 indicates wait for Ctrl+C signal) */
60 int io_tier
= 0; /* Unit: 0/1/2/3; Desc.: I/O Tier */
61 int file_size
= DEFAULT_FILE_SIZE
; /* Unit: pages ; Desc.: File Size in 4096 byte blocks */
62 int cached_io_flag
= 0; /* Unit: 0/1 ; Desc.: I/O Caching behavior (no-cached/cached) */
63 int io_qos_timeout_ms
= 0; /* Unit: msecs ; Desc.: I/O QOS timeout */
65 int user_specified_file
= 0;
66 qos_device_type_t qos_device
= 0;
68 int64_t total_io_count
= 0;
69 int64_t total_io_size
= 0;
70 int64_t total_io_time
= 0;
71 int64_t max_io_time
= 0;
72 int64_t total_burst_count
= 0;
73 int64_t latency_histogram
[LATENCY_BINS
];
74 int64_t burst_latency_histogram
[LATENCY_BINS
];
75 int64_t low_latency_histogram
[LOW_LATENCY_BINS
];
76 int64_t throughput_histogram
[MAX_ITERATIONS
];
77 int64_t throughput_index
;
78 CFRunLoopTimerRef runLoopTimer
= NULL
;
80 void print_usage(void);
81 void print_data_percentage(double percent
);
82 void print_stats(void);
83 unsigned int find_io_bin(int64_t latency
, int latency_bin_size
, int latency_bins
);
84 void signalHandler(int sig
);
85 void assertASP(CFRunLoopTimerRef timer
, void *info
);
86 void start_qos_timer(void);
87 void stop_qos_timer(void);
88 void perform_io(int fd
, char *buf
, int size
, int type
);
89 void *sync_routine(void *arg
);
90 void *calculate_throughput(void *arg
);
91 void *io_routine(void *arg
);
92 void validate_option(int value
, int min
, int max
, char *option
, char *units
);
93 void print_test_setup(int value
, char *option
, char *units
, char *comment
);
94 void setup_process_io_policy(int io_tier
);
95 void setup_qos_device(void);
96 void print_latency_histogram(int64_t *data
, int latency_bins
, int latency_bin_size
, double io_count
);
97 int system_cmd(char *command
);
102 printf("Usage: ./iosim [options]\n");
103 printf("Options:\n");
104 printf("-c: (number) Burst Count. No. of I/Os performed in an I/O burst\n");
105 printf("-i: (msecs) Inter Burst Duration. Amount of time the thread sleeps between bursts (-1 indicates random durations between 0-100 msecs)\n");
106 printf("-d: (msecs) Inter I/O delay. Amount of time between issuing I/Os\n");
107 printf("-t: (number) Thread count\n");
108 printf("-f: (0/1/2 : Read-Only/Write-Only/Mixed RW) Workload Type\n");
109 printf("-m: (0/1 : Sequential/Random) I/O pattern\n");
110 printf("-j: (number) Size of I/O in bytes\n");
111 printf("-s: (msecs) Frequency of sync() calls\n");
112 printf("-x: (secs) Test duration (0 indicates that the tool would wait for a Ctrl-C)\n");
113 printf("-l: (0/1/2/3) I/O Tier\n");
114 printf("-z: (number) File Size in pages (1 page = 4096 bytes) \n");
115 printf("-n: (string) File name used for tests (the tool would create files if this option is not specified)\n");
116 printf("-a: (0/1 : Non-cached/Cached) I/O Caching behavior\n");
117 printf("-q: (msecs) I/O QoS timeout. Time of I/O before drive assert and system panic\n");
120 void print_data_percentage(double percent
)
122 int count
= (int)(round(percent
/ 5.0));
123 int spaces
= 20 - count
;
125 for(; count
> 0; count
--)
127 for(; spaces
> 0; spaces
--)
132 void print_latency_histogram(int64_t *data
, int latency_bins
, int latency_bin_size
, double io_count
)
135 char label
[MAX_FILENAME
];
138 for (i
= 0; i
< latency_bins
; i
++) {
139 if (i
== (latency_bins
- 1))
140 snprintf(label
, MAX_FILENAME
, "> %d usecs", i
* latency_bin_size
);
142 snprintf(label
, MAX_FILENAME
, "%d - %d usecs", i
* latency_bin_size
, (i
+1) * latency_bin_size
);
143 printf("%25s ", label
);
144 percentage
= ((double)data
[i
] * 100.000000) / io_count
;
145 print_data_percentage(percentage
);
146 printf(" %.6lf%%\n", percentage
);
155 char label
[MAX_FILENAME
];
157 printf("I/O Statistics:\n");
159 printf("Total I/Os : %lld\n", total_io_count
);
160 printf("Avg. Latency : %.2lf usecs\n", ((double)total_io_time
) / ((double)total_io_count
));
161 printf("Max. Latency : %.2lf usecs\n", ((double)max_io_time
));
163 printf("Low Latency Histogram: \n");
164 print_latency_histogram(low_latency_histogram
, LOW_LATENCY_BINS
, LOW_LATENCY_BIN_SIZE
, (double)total_io_count
);
165 printf("Latency Histogram: \n");
166 print_latency_histogram(latency_histogram
, LATENCY_BINS
, LATENCY_BIN_SIZE
, (double)total_io_count
);
167 printf("Burst Avg. Latency Histogram: \n");
168 print_latency_histogram(burst_latency_histogram
, LATENCY_BINS
, LATENCY_BIN_SIZE
, (double)total_burst_count
);
170 printf("Throughput Timeline: \n");
172 int64_t max_throughput
= 0;
173 for (i
= 0; i
< throughput_index
; i
++) {
174 if (max_throughput
< throughput_histogram
[i
])
175 max_throughput
= throughput_histogram
[i
];
178 for (i
= 0; i
< throughput_index
; i
++) {
179 snprintf(label
, MAX_FILENAME
, "T=%d msecs", (i
+1) * THROUGHPUT_INTERVAL
);
180 printf("%25s ", label
);
181 percentage
= ((double)throughput_histogram
[i
] * 100) / (double)max_throughput
;
182 print_data_percentage((int)percentage
);
183 printf("%.2lf MBps\n", ((double)throughput_histogram
[i
] / 1048576.0) / ((double)THROUGHPUT_INTERVAL
/ 1000.0));
188 unsigned int find_io_bin(int64_t latency
, int latency_bin_size
, int latency_bins
)
190 int bin
= (int) (latency
/ latency_bin_size
);
191 if (bin
>= latency_bins
)
192 bin
= latency_bins
- 1;
196 void signalHandler(int sig
)
203 void setup_qos_device(void)
205 kern_return_t status
= kIOReturnError
;
206 io_iterator_t iterator
= IO_OBJECT_NULL
;
208 if(io_qos_timeout_ms
<= 0)
211 printf ( "*** setup_qos_device *** \n" );
213 status
= IOServiceGetMatchingServices ( kIOMasterPortDefault
, IOServiceMatching ( kIONVMeANS2ControllerString
), &iterator
);
215 if ( status
!= kIOReturnSuccess
)
218 if ( iterator
!= IO_OBJECT_NULL
) {
219 printf ( "Found NVMe ANS2 Device \n" );
220 qos_device
= kNVMeDeviceANS2
;
224 status
= IOServiceGetMatchingServices ( kIOMasterPortDefault
, IOServiceMatching ( kIONVMeANS2EmbeddedControllerString
), &iterator
);
226 if ( status
!= kIOReturnSuccess
)
229 if ( iterator
!= IO_OBJECT_NULL
) {
230 printf ( "Found NVMe ANS2 Embedded Device \n" );
231 qos_device
= kNVMeDeviceANS2
;
235 status
= IOServiceGetMatchingServices ( kIOMasterPortDefault
, IOServiceMatching ( kIONVMeControllerString
), &iterator
);
237 if ( status
!= kIOReturnSuccess
)
240 if ( iterator
!= IO_OBJECT_NULL
) {
241 printf ( "Found NVMe Device \n" );
242 qos_device
= kNVMeDevice
;
246 printf ( "NVMe Device not found, not setting qos timeout\n" );
247 qos_device
= kDefaultDevice
;
251 void assertASP(CFRunLoopTimerRef timer
, void *info
)
253 char command
[ 1024 ];
255 if(qos_device
== kDefaultDevice
)
258 printf("assertASP. Timeout of IO exceeds = %d msec\n", io_qos_timeout_ms
);
260 // kNVMe_ANS2_Force_Assert_offset = 0x13EC, // GP59
261 // kNVMe_Force_Assert_Offset = 0x550,
263 if(qos_device
== kNVMeDeviceANS2
)
264 snprintf ( command
, sizeof ( command
), "/usr/local/bin/nvmectl-tool.py -a WriteRegister32 $((0x13EC)) 0xFFFF" );
265 else if(qos_device
== kNVMeDevice
)
266 snprintf ( command
, sizeof ( command
), "/usr/local/bin/nvmectl-tool.py -a WriteRegister32 $((0x550)) 0xFFFF" );
271 printf("Command : %s\n", command
);
274 // Panic the system as well
275 panic("IO time > QoS timeout");
280 void start_qos_timer(void)
284 if(io_qos_timeout_ms
<= 0)
287 timeout_sec
= (float)io_qos_timeout_ms
/1000;
289 // Schedule a "timeout" delayed task that checks IO's which take > timeout sec to complete
290 runLoopTimer
= CFRunLoopTimerCreate(NULL
, CFAbsoluteTimeGetCurrent()+timeout_sec
, 0, 0, 0, assertASP
, NULL
);
291 CFRunLoopAddTimer(CFRunLoopGetMain(), runLoopTimer
, kCFRunLoopDefaultMode
);
294 void stop_qos_timer(void)
296 if(runLoopTimer
== NULL
)
299 CFRunLoopTimerInvalidate(runLoopTimer
);
300 CFRunLoopRemoveTimer(CFRunLoopGetMain(), runLoopTimer
, kCFRunLoopDefaultMode
);
301 CFRelease(runLoopTimer
);
304 void perform_io(int fd
, char *buf
, int size
, int type
)
308 if (type
== WORKLOAD_TYPE_RW
)
309 type
= (rand() % 2) ? WORKLOAD_TYPE_WO
: WORKLOAD_TYPE_RO
;
313 if (type
== WORKLOAD_TYPE_RO
)
314 ret
= read(fd
, buf
, size
);
316 ret
= write(fd
, buf
, size
);
319 if (lseek(fd
, 0, SEEK_SET
) < 0) {
320 perror("lseek() to reset file offset to zero failed!\n");
326 perror("read/write syscall failed!\n");
339 void *sync_routine(void *arg
)
342 usleep(sync_frequency_ms
* 1000);
348 void *calculate_throughput(void *arg
)
350 int64_t prev_total_io_size
= 0;
354 usleep(THROUGHPUT_INTERVAL
* 1000);
355 size
= total_io_size
- prev_total_io_size
;
356 throughput_histogram
[throughput_index
] = size
;
357 prev_total_io_size
= total_io_size
;
363 void *io_routine(void *arg
)
365 struct timeval start_tv
;
366 struct timeval end_tv
;
368 int64_t burst_elapsed
;
370 char test_filename
[MAX_FILENAME
];
371 struct stat filestat
;
372 int i
, fd
, io_thread_id
;
374 io_thread_id
= (int)arg
;
375 if (user_specified_file
)
376 strlcpy(test_filename
, user_fname
, MAX_FILENAME
);
378 snprintf(test_filename
, MAX_FILENAME
, "iosim-%d-%d", (int)getpid(), io_thread_id
);
380 if (0 > (fd
= open(test_filename
, O_RDWR
, S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
))) {
381 printf("Error opening file %s!\n", test_filename
);
385 if (fstat(fd
, &filestat
) < 0) {
386 printf("Error stat()ing file %s!\n", test_filename
);
390 if (filestat
.st_size
< io_size
) {
391 printf("%s: File size (%lld) smaller than I/O size (%d)!\n", test_filename
, filestat
.st_size
, io_size
);
396 fcntl(fd
, F_NOCACHE
, 1);
398 fcntl(fd
, F_RDAHEAD
, 0);
400 if(!(data
= (char *)calloc(io_size
, 1))) {
401 perror("Error allocating buffers for I/O!\n");
404 memset(data
, '\0', io_size
);
409 for(i
= 0; i
< burst_count
; i
++) {
410 if (io_mode
== IO_MODE_RANDOM
) {
411 if (lseek(fd
, (rand() % (filestat
.st_size
- io_size
)) & PG_MASK
, SEEK_SET
) < 0) {
412 perror("Error lseek()ing to random location in file!\n");
419 gettimeofday(&start_tv
, NULL
);
420 perform_io(fd
, data
, io_size
, workload_type
);
421 gettimeofday(&end_tv
, NULL
);
425 OSAtomicIncrement64(&total_io_count
);
426 OSAtomicAdd64(io_size
, &total_io_size
);
427 elapsed
= ((end_tv
.tv_sec
- start_tv
.tv_sec
) * 1000000) + (end_tv
.tv_usec
- start_tv
.tv_usec
);
429 if (elapsed
> max_io_time
) {
430 max_io_time
= elapsed
;
433 OSAtomicAdd64(elapsed
, &total_io_time
);
434 OSAtomicIncrement64(&(latency_histogram
[find_io_bin(elapsed
, LATENCY_BIN_SIZE
, LATENCY_BINS
)]));
435 OSAtomicIncrement64(&(low_latency_histogram
[find_io_bin(elapsed
, LOW_LATENCY_BIN_SIZE
, LOW_LATENCY_BINS
)]));
436 burst_elapsed
+= elapsed
;
438 if (inter_io_delay_ms
)
439 usleep(inter_io_delay_ms
* 1000);
442 burst_elapsed
/= burst_count
;
443 OSAtomicIncrement64(&(burst_latency_histogram
[find_io_bin(burst_elapsed
, LATENCY_BIN_SIZE
, LATENCY_BINS
)]));
444 OSAtomicIncrement64(&total_burst_count
);
446 if(inter_burst_duration
== -1)
447 usleep((rand() % 100) * 1000);
449 usleep(inter_burst_duration
* 1000);
457 void validate_option(int value
, int min
, int max
, char *option
, char *units
)
459 if (value
< min
|| value
> max
) {
460 printf("Illegal option value %d for %s (Min value: %d %s, Max value: %d %s).\n", value
, option
, min
, units
, max
, units
);
465 void print_test_setup(int value
, char *option
, char *units
, char *comment
)
468 printf("%32s: %16d %-16s\n", option
, value
, units
);
470 printf("%32s: %16d %-16s (%s)\n", option
, value
, units
, comment
);
473 void setup_process_io_policy(int io_tier
)
478 if (setiopolicy_np(IOPOL_TYPE_DISK
, IOPOL_SCOPE_PROCESS
, IOPOL_IMPORTANT
))
482 if (setiopolicy_np(IOPOL_TYPE_DISK
, IOPOL_SCOPE_PROCESS
, IOPOL_STANDARD
))
486 if (setiopolicy_np(IOPOL_TYPE_DISK
, IOPOL_SCOPE_PROCESS
, IOPOL_UTILITY
))
490 if (setiopolicy_np(IOPOL_TYPE_DISK
, IOPOL_SCOPE_PROCESS
, IOPOL_THROTTLE
))
497 printf("Error setting process-wide I/O policy to %d\n", io_tier
);
501 int main(int argc
, char *argv
[])
504 pthread_t thread_list
[MAX_THREADS
];
505 pthread_t sync_thread
;
506 pthread_t throughput_thread
;
507 char fname
[MAX_FILENAME
];
509 while((option
= getopt(argc
, argv
,"hc:i:d:t:f:m:j:s:x:l:z:n:a:q:")) != -1) {
512 burst_count
= atoi(optarg
);
513 validate_option(burst_count
, 0, INT_MAX
, "Burst Count", "I/Os");
516 inter_burst_duration
= atoi(optarg
);
517 validate_option(inter_burst_duration
, -1, INT_MAX
, "Inter Burst duration", "msecs");
520 inter_io_delay_ms
= atoi(optarg
);
521 validate_option(inter_io_delay_ms
, 0, INT_MAX
, "Inter I/O Delay", "msecs");
524 thread_count
= atoi(optarg
);
525 validate_option(thread_count
, 0, MAX_THREADS
, "Thread Count", "Threads");
528 workload_type
= atoi(optarg
);
529 validate_option(workload_type
, 0, 2, "Workload Type", "");
532 io_mode
= atoi(optarg
);
533 validate_option(io_mode
, 0, 1, "I/O Mode", "");
536 io_size
= atoi(optarg
);
537 validate_option(io_size
, 0, INT_MAX
, "I/O Size", "Bytes");
543 sync_frequency_ms
= atoi(optarg
);
544 validate_option(sync_frequency_ms
, 0, INT_MAX
, "Sync. Frequency", "msecs");
547 test_duration
= atoi(optarg
);
548 validate_option(test_duration
, 0, INT_MAX
, "Test duration", "secs");
551 io_tier
= atoi(optarg
);
552 validate_option(io_tier
, 0, 3, "I/O Tier", "");
555 file_size
= atoi(optarg
);
556 validate_option(file_size
, 0, INT_MAX
, "File Size", "bytes");
560 user_specified_file
= 1;
563 cached_io_flag
= atoi(optarg
);
564 validate_option(cached_io_flag
, 0, 1, "I/Os cached/no-cached", "");
567 io_qos_timeout_ms
= atoi(optarg
);
568 validate_option(io_qos_timeout_ms
, 0, INT_MAX
, "I/O QoS timeout", "msecs");
571 printf("Unknown option %c\n", option
);
577 printf("***********************TEST SETUP*************************\n");
579 print_test_setup(burst_count
, "Burst Count", "I/Os", 0);
580 print_test_setup(inter_burst_duration
, "Inter Burst duration", "msecs", "-1 indicates random burst duration");
581 print_test_setup(inter_io_delay_ms
, "Inter I/O Delay", "msecs", 0);
582 print_test_setup(thread_count
, "Thread Count", "Threads", 0);
583 print_test_setup(workload_type
, "Workload Type", "", "0:R 1:W 2:RW");
584 print_test_setup(io_mode
, "I/O Mode", "", "0:Seq. 1:Rnd");
585 print_test_setup(io_size
, "I/O Size", "Bytes", 0);
586 print_test_setup(sync_frequency_ms
, "Sync. Frequency", "msecs", "0 indicates no sync. thread");
587 print_test_setup(test_duration
, "Test duration", "secs", "0 indicates tool waits for Ctrl+C");
588 print_test_setup(io_tier
, "I/O Tier", "", 0);
589 print_test_setup(cached_io_flag
, "I/O Caching", "", "0 indicates non-cached I/Os");
590 print_test_setup(io_qos_timeout_ms
, "I/O QoS Threshold Timeout", "msecs", 0);
591 print_test_setup(0, "File read-aheads", "", "0 indicates read-aheads disabled");
593 printf("**********************************************************\n");
595 if (user_specified_file
== 0) {
596 char dd_command
[MAX_CMD_SIZE
];
597 for (i
=0; i
< thread_count
; i
++) {
598 snprintf(fname
, MAX_FILENAME
, "iosim-%d-%d", (int)getpid(), i
);
599 snprintf(dd_command
, MAX_CMD_SIZE
, "dd if=/dev/urandom of=%s bs=4096 count=%d", fname
, file_size
);
600 printf("Creating file %s of size %lld...\n", fname
, ((int64_t)file_size
* 4096));
601 system_cmd(dd_command
);
604 printf("Using user specified file %s for all threads...\n", user_fname
);
607 setup_process_io_policy(io_tier
);
611 printf("**********************************************************\n");
612 printf("Creating threads and generating workload...\n");
614 signal(SIGINT
, signalHandler
);
615 signal(SIGALRM
, signalHandler
);
617 for(i
=0; i
< thread_count
; i
++) {
618 if (pthread_create(&thread_list
[i
], NULL
, io_routine
, i
) < 0) {
619 perror("Could not create I/O thread!\n");
624 if (sync_frequency_ms
) {
625 if (pthread_create(&sync_thread
, NULL
, sync_routine
, NULL
) < 0) {
626 perror("Could not create sync thread!\n");
631 if (pthread_create(&throughput_thread
, NULL
, calculate_throughput
, NULL
) < 0) {
632 perror("Could not throughput calculation thread!\n");
636 if(io_qos_timeout_ms
> 0) {
637 CFRunLoopRunInMode(kCFRunLoopDefaultMode
, (CFTimeInterval
)test_duration
, false);
640 /* All threads are now initialized */
642 alarm(test_duration
);
645 for(i
=0; i
< thread_count
; i
++)
646 pthread_join(thread_list
[i
], NULL
);
648 if (sync_frequency_ms
)
649 pthread_join(sync_thread
, NULL
);
651 pthread_join(throughput_thread
, NULL
);
656 extern char **environ
;
658 int system_cmd(char *command
)
660 // workaround for rdar://problem/53281655
662 char *argv
[] = {"sh", "-c", command
, NULL
};
664 status
= posix_spawn(&pid
, "/bin/sh", NULL
, NULL
, argv
, environ
);
666 if (waitpid(pid
, &status
, 0) != -1) {