]> git.cameronkatri.com Git - apple_cmds.git/blob - system_cmds/gcore.tproj/sparse.c
system_cmds: Fix compilation for lower targets, downgrade lsmp
[apple_cmds.git] / system_cmds / gcore.tproj / sparse.c
1 /*
2 * Copyright (c) 2016 Apple Inc. All rights reserved.
3 */
4
5 typedef char *kobject_description_t[512];
6 #include "options.h"
7 #include "vm.h"
8 #include "region.h"
9 #include "utils.h"
10 #include "dyld.h"
11 #include "threads.h"
12 #include "sparse.h"
13 #include "vanilla.h"
14 #include "corefile.h"
15
16 #include <sys/types.h>
17 #include <sys/sysctl.h>
18 #include <sys/stat.h>
19 #include <sys/mman.h>
20 #include <libproc.h>
21
22 #include <stdio.h>
23 #include <string.h>
24 #include <strings.h>
25 #include <stdlib.h>
26 #include <stdarg.h>
27 #include <signal.h>
28 #include <unistd.h>
29 #include <errno.h>
30 #include <ctype.h>
31 #include <fcntl.h>
32 #include <assert.h>
33
34 #include <mach/mach.h>
35
36 static struct subregion *
37 new_subregion(
38 const mach_vm_offset_t vmaddr,
39 const mach_vm_offset_t vmsize,
40 const native_segment_command_t *sc,
41 const struct libent *le)
42 {
43 struct subregion *s = malloc(sizeof (*s));
44
45 assert(vmaddr != 0 && vmsize != 0);
46 assert(vmaddr < vmaddr + vmsize);
47 s->s_segcmd = *sc;
48
49 S_SETADDR(s, vmaddr);
50 S_SETSIZE(s, vmsize);
51
52 s->s_libent = le;
53 s->s_isuuidref = false;
54 return s;
55 }
56
57 static void
58 del_subregion(struct subregion *s)
59 {
60 poison(s, 0xfacefac1, sizeof (*s));
61 free(s);
62 }
63
64 static walk_return_t
65 clean_subregions(struct region *r)
66 {
67 if (r->r_nsubregions) {
68 assert(r->r_subregions);
69 for (unsigned i = 0; i < r->r_nsubregions; i++)
70 del_subregion(r->r_subregions[i]);
71 poison(r->r_subregions, 0xfac1fac1, sizeof (r->r_subregions[0]) * r->r_nsubregions);
72 free(r->r_subregions);
73 r->r_nsubregions = 0;
74 r->r_subregions = NULL;
75 } else {
76 assert(NULL == r->r_subregions);
77 }
78 return WALK_CONTINUE;
79 }
80
81 void
82 del_sparse_region(struct region *r)
83 {
84 clean_subregions(r);
85 poison(r, 0xcafecaff, sizeof (*r));
86 free(r);
87 }
88
89 #define NULLsc ((native_segment_command_t *)0)
90
91 static bool
92 issamesubregiontype(const struct subregion *s0, const struct subregion *s1) {
93 return 0 == strncmp(S_MACHO_TYPE(s0), S_MACHO_TYPE(s1), sizeof (NULLsc->segname));
94 }
95
96 bool
97 issubregiontype(const struct subregion *s, const char *sctype) {
98 return 0 == strncmp(S_MACHO_TYPE(s), sctype, sizeof (NULLsc->segname));
99 }
100
101 static void
102 elide_subregion(struct region *r, unsigned ind)
103 {
104 del_subregion(r->r_subregions[ind]);
105 for (unsigned j = ind; j < r->r_nsubregions - 1; j++)
106 r->r_subregions[j] = r->r_subregions[j+1];
107 assert(r->r_nsubregions != 0);
108 r->r_subregions[--r->r_nsubregions] = NULL;
109 }
110
111 struct subregionlist {
112 STAILQ_ENTRY(subregionlist) srl_linkage;
113 struct subregion *srl_s;
114 };
115 typedef STAILQ_HEAD(, subregionlist) subregionlisthead_t;
116
117 static walk_return_t
118 add_subregions_for_libent(
119 subregionlisthead_t *srlh,
120 const struct region *r,
121 const native_mach_header_t *mh,
122 const mach_vm_offset_t __unused mh_taddr, // address in target
123 const struct libent *le)
124 {
125 const struct load_command *lc = (const void *)(mh + 1);
126 mach_vm_offset_t objoff = le->le_objoff;
127 for (unsigned n = 0; n < mh->ncmds; n++) {
128
129 const native_segment_command_t *sc;
130
131 switch (lc->cmd) {
132 case NATIVE_LC_SEGMENT:
133 sc = (const void *)lc;
134
135 if (0 == sc->vmaddr && strcmp(sc->segname, SEG_PAGEZERO) == 0)
136 break;
137 mach_vm_offset_t lo = sc->vmaddr + objoff;
138 mach_vm_offset_t hi = lo + sc->vmsize;
139
140 /* Eliminate non-overlapping sections first */
141
142 if (R_ENDADDR(r) - 1 < lo)
143 break;
144 if (hi - 1 < R_ADDR(r))
145 break;
146
147 /*
148 * Some part of this segment is in the region.
149 * Trim the edges in the case where we span regions.
150 */
151 if (lo < R_ADDR(r))
152 lo = R_ADDR(r);
153 if (hi > R_ENDADDR(r))
154 hi = R_ENDADDR(r);
155
156 struct subregionlist *srl = calloc(1, sizeof (*srl));
157 struct subregion *s = new_subregion(lo, hi - lo, sc, le);
158 assert(sc->fileoff >= 0);
159 srl->srl_s = s;
160 STAILQ_INSERT_HEAD(srlh, srl, srl_linkage);
161
162 if (OPTIONS_DEBUG(opt, 2)) {
163 hsize_str_t hstr;
164 printr(r, "subregion %llx-%llx %7s %12s\t%s [%s off %lu for %lu nsects %u flags %x]\n",
165 S_ADDR(s), S_ENDADDR(s),
166 str_hsize(hstr, S_SIZE(s)),
167 sc->segname,
168 S_FILENAME(s),
169 str_prot(sc->initprot),
170 (unsigned long)sc->fileoff,
171 (unsigned long)sc->filesize,
172 sc->nsects, sc->flags);
173 }
174 break;
175 default:
176 break;
177 }
178 if (lc->cmdsize)
179 lc = (const void *)((caddr_t)lc + lc->cmdsize);
180 else
181 break;
182 }
183 return WALK_CONTINUE;
184 }
185
186 /*
187 * Because we aggregate information from multiple sources, there may
188 * be duplicate subregions. Eliminate them here.
189 *
190 * Note that the each library in the shared cache points
191 * separately at a single, unified (large!) __LINKEDIT section; these
192 * get removed here too.
193 *
194 * Assumes the subregion array is sorted by address!
195 */
196 static void
197 eliminate_duplicate_subregions(struct region *r)
198 {
199 unsigned i = 1;
200 while (i < r->r_nsubregions) {
201 struct subregion *s0 = r->r_subregions[i-1];
202 struct subregion *s1 = r->r_subregions[i];
203
204 if (S_ADDR(s0) != S_ADDR(s1) || S_SIZE(s0) != S_SIZE(s1)) {
205 i++;
206 continue;
207 }
208 if (memcmp(&s0->s_segcmd, &s1->s_segcmd, sizeof (s0->s_segcmd)) != 0) {
209 i++;
210 continue;
211 }
212 if (OPTIONS_DEBUG(opt, 3))
213 printr(r, "eliding duplicate %s subregion (%llx-%llx) file %s\n",
214 S_MACHO_TYPE(s1), S_ADDR(s1), S_ENDADDR(s1), S_FILENAME(s1));
215 /* If the duplicate subregions aren't mapping the same file (?), forget the name */
216 if (s0->s_libent != s1->s_libent)
217 s0->s_libent = s1->s_libent = NULL;
218 elide_subregion(r, i);
219 }
220 }
221
222 /*
223 * See if any of the dyld information we have can better describe this
224 * region of the target address space.
225 */
226 walk_return_t
227 decorate_memory_region(struct region *r, void *arg)
228 {
229 if (r->r_inzfodregion || r->r_incommregion)
230 return WALK_CONTINUE;
231
232 const dyld_process_info dpi = arg;
233
234 __block walk_return_t retval = WALK_CONTINUE;
235 __block subregionlisthead_t srlhead = STAILQ_HEAD_INITIALIZER(srlhead);
236
237 _dyld_process_info_for_each_image(dpi, ^(uint64_t __unused mhaddr, const uuid_t uuid, __unused const char *path) {
238 if (WALK_CONTINUE == retval) {
239 const struct libent *le = libent_lookup_byuuid(uuid);
240 assert(le->le_mhaddr == mhaddr);
241 bool shouldskip = false;
242 if (V_SIZE(&le->le_vr))
243 shouldskip = (R_ENDADDR(r) < V_ADDR(&le->le_vr) ||
244 R_ADDR(r) > V_ENDADDR(&le->le_vr));
245 if (!shouldskip)
246 retval = add_subregions_for_libent(&srlhead, r, le->le_mh, le->le_mhaddr, le);
247 }
248 });
249 if (WALK_CONTINUE != retval)
250 goto done;
251
252 /*
253 * Take the unsorted list of subregions, if any,
254 * and hang a sorted array of ranges on the region structure.
255 */
256 if (!STAILQ_EMPTY(&srlhead)) {
257 struct subregionlist *srl;
258 STAILQ_FOREACH(srl, &srlhead, srl_linkage) {
259 r->r_nsubregions++;
260 }
261 assert(r->r_nsubregions);
262
263 r->r_subregions = calloc(r->r_nsubregions, sizeof (void *));
264 unsigned i = 0;
265 STAILQ_FOREACH(srl, &srlhead, srl_linkage) {
266 r->r_subregions[i++] = srl->srl_s;
267 }
268 qsort_b(r->r_subregions, r->r_nsubregions, sizeof (void *),
269 ^(const void *a, const void *b) {
270 const struct subregion *lhs = *(struct subregion **)a;
271 const struct subregion *rhs = *(struct subregion **)b;
272 if (S_ADDR(lhs) > S_ADDR(rhs))
273 return 1;
274 if (S_ADDR(lhs) < S_ADDR(rhs))
275 return -1;
276 return 0;
277 });
278
279 eliminate_duplicate_subregions(r);
280
281 if (r->r_info.external_pager) {
282 /*
283 * Only very specific segment types get to be filerefs
284 */
285 for (i = 0; i < r->r_nsubregions; i++) {
286 struct subregion *s = r->r_subregions[i];
287 /*
288 * Anything marked writable is trivially disqualified; we're
289 * going to copy it anyway.
290 */
291 if (s->s_segcmd.initprot & VM_PROT_WRITE)
292 continue;
293
294 /* __TEXT and __LINKEDIT are our real targets */
295 if (!issubregiontype(s, SEG_TEXT) && !issubregiontype(s, SEG_LINKEDIT) && !issubregiontype(s, "__UNICODE")) {
296 if (OPTIONS_DEBUG(opt, 3)) {
297 hsize_str_t hstr;
298 printvr(S_RANGE(s), "skipping read-only %s segment %s\n", S_MACHO_TYPE(s), str_hsize(hstr, S_SIZE(s)));
299 }
300 continue;
301 }
302 if (r->r_insharedregion) {
303 /*
304 * Part of the shared region: things get more complicated.
305 */
306 if (r->r_fileref) {
307 /*
308 * There's a file reference here for the whole region.
309 * For __TEXT subregions, we could, in principle (though
310 * see below) generate references to the individual
311 * dylibs that dyld reports in the region. If the
312 * debugger could then use the __LINKEDIT info in the
313 * file, then we'd be done. But as long as the dump
314 * includes __LINKEDIT sections, we're going to
315 * end up generating a file reference to the combined
316 * __LINKEDIT section in the shared cache anyway, so
317 * we might as well do that for the __TEXT regions as
318 * well.
319 */
320 s->s_libent = r->r_fileref->fr_libent;
321 s->s_isuuidref = true;
322 } else {
323 /*
324 * If we get here, it's likely that the shared cache
325 * name can't be found e.g. update_dyld_shared_cache(1).
326 * For __TEXT subregions, we could generate refs to
327 * the individual dylibs, but note that the mach header
328 * and segment commands in memory are still pointing
329 * into the shared cache so any act of reconstruction
330 * is fiendishly complex. So copy it.
331 */
332 assert(!s->s_isuuidref);
333 }
334 } else {
335 /* Just a regular dylib? */
336 if (s->s_libent)
337 s->s_isuuidref = true;
338 }
339 }
340 }
341 }
342 assert(WALK_CONTINUE == retval);
343
344 done:
345 if (!STAILQ_EMPTY(&srlhead)) {
346 struct subregionlist *srl, *trl;
347 STAILQ_FOREACH_SAFE(srl, &srlhead, srl_linkage, trl) {
348 free(srl);
349 }
350 }
351 return retval;
352 }
353
354 /*
355 * Strip region of all decoration
356 *
357 * Invoked (on every region!) after an error during the initial
358 * 'decoration' phase to discard potentially incomplete information.
359 */
360 walk_return_t
361 undecorate_memory_region(struct region *r, __unused void *arg)
362 {
363 assert(&sparse_ops != r->r_op);
364 return r->r_nsubregions ? clean_subregions(r) : WALK_CONTINUE;
365 }
366
367 /*
368 * This optimization occurs -after- the vanilla_region_optimizations(),
369 * and -after- we've tagged zfod and first-pass fileref's.
370 */
371 walk_return_t
372 sparse_region_optimization(struct region *r, __unused void *arg)
373 {
374 assert(&sparse_ops != r->r_op);
375
376 if (r->r_inzfodregion) {
377 /*
378 * Pure zfod region: almost certainly a more compact
379 * representation - keep it that way.
380 */
381 if (OPTIONS_DEBUG(opt, 3))
382 printr(r, "retaining zfod region\n");
383 assert(&zfod_ops == r->r_op);
384 return clean_subregions(r);
385 }
386
387 if (r->r_insharedregion && 0 == r->r_nsubregions) {
388 /*
389 * A segment in the shared region needs to be
390 * identified with an LC_SEGMENT that dyld claims,
391 * otherwise (we assert) it's not useful to the dump.
392 */
393 if (OPTIONS_DEBUG(opt, 2)) {
394 hsize_str_t hstr;
395 printr(r, "not referenced in dyld info => "
396 "eliding %s range in shared region\n",
397 str_hsize(hstr, R_SIZE(r)));
398 }
399 if (0 == r->r_info.pages_dirtied && 0 == r->r_info.pages_swapped_out)
400 return WALK_DELETE_REGION;
401 if (OPTIONS_DEBUG(opt, 2)) {
402 hsize_str_t hstr;
403 printr(r, "dirty pages, but not referenced in dyld info => "
404 "NOT eliding %s range in shared region\n",
405 str_hsize(hstr, R_SIZE(r)));
406 }
407 }
408
409 if (r->r_fileref) {
410 /*
411 * Already have a fileref for the whole region: already
412 * a more compact representation - keep it that way.
413 */
414 if (OPTIONS_DEBUG(opt, 3))
415 printr(r, "retaining fileref region\n");
416 assert(&fileref_ops == r->r_op);
417 return clean_subregions(r);
418 }
419
420 if (r->r_nsubregions > 1) {
421 /*
422 * Merge adjacent or identical subregions that have no file reference
423 * (Reducing the number of subregions reduces header overhead and
424 * improves compressability)
425 */
426 unsigned i = 1;
427 while (i < r->r_nsubregions) {
428 struct subregion *s0 = r->r_subregions[i-1];
429 struct subregion *s1 = r->r_subregions[i];
430
431 if (s0->s_isuuidref) {
432 i++;
433 continue; /* => destined to be a fileref */
434 }
435 if (!issamesubregiontype(s0, s1)) {
436 i++;
437 continue; /* merge-able subregions must have same "type" */
438 }
439
440 if (S_ENDADDR(s0) == S_ADDR(s1)) {
441 /* directly adjacent subregions */
442 if (OPTIONS_DEBUG(opt, 2))
443 printr(r, "merging subregions (%llx-%llx + %llx-%llx) -- adjacent\n",
444 S_ADDR(s0), S_ENDADDR(s0), S_ADDR(s1), S_ENDADDR(s1));
445 S_SETSIZE(s0, S_ENDADDR(s1) - S_ADDR(s0));
446 elide_subregion(r, i);
447 continue;
448 }
449
450 const mach_vm_size_t pfn[2] = {
451 S_ADDR(s0) >> pageshift_host,
452 S_ADDR(s1) >> pageshift_host
453 };
454 const mach_vm_size_t endpfn[2] = {
455 (S_ENDADDR(s0) - 1) >> pageshift_host,
456 (S_ENDADDR(s1) - 1) >> pageshift_host
457 };
458
459 if (pfn[0] == pfn[1] && pfn[0] == endpfn[0] && pfn[0] == endpfn[1]) {
460 /* two small subregions share a host page */
461 if (OPTIONS_DEBUG(opt, 2))
462 printr(r, "merging subregions (%llx-%llx + %llx-%llx) -- same page\n",
463 S_ADDR(s0), S_ENDADDR(s0), S_ADDR(s1), S_ENDADDR(s1));
464 S_SETSIZE(s0, S_ENDADDR(s1) - S_ADDR(s0));
465 elide_subregion(r, i);
466 continue;
467 }
468
469 if (pfn[1] == 1 + endpfn[0]) {
470 /* subregions are pagewise-adjacent: bigger chunks to compress */
471 if (OPTIONS_DEBUG(opt, 2))
472 printr(r, "merging subregions (%llx-%llx + %llx-%llx) -- adjacent pages\n",
473 S_ADDR(s0), S_ENDADDR(s0), S_ADDR(s1), S_ENDADDR(s1));
474 S_SETSIZE(s0, S_ENDADDR(s1) - S_ADDR(s0));
475 elide_subregion(r, i);
476 continue;
477 }
478
479 i++; /* this isn't the subregion we're looking for */
480 }
481 }
482
483 if (1 == r->r_nsubregions) {
484 struct subregion *s = r->r_subregions[0];
485 if (!s->s_isuuidref &&
486 R_ADDR(r) == S_ADDR(s) && R_ENDADDR(r) == S_ENDADDR(s)) {
487 if (OPTIONS_DEBUG(opt, 3))
488 printr(r, "subregion (%llx-%llx) reverts to region\n",
489 S_ADDR(s), S_ENDADDR(s));
490 return clean_subregions(r);
491 }
492 }
493
494 if (r->r_nsubregions)
495 r->r_op = &sparse_ops;
496
497 return WALK_CONTINUE;
498 }