threadobj.c 45.2 KB
Newer Older
1
/*
2
 * Copyright (C) 2008-2011 Philippe Gerum <rpm@xenomai.org>.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.

 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 *
18
 * Thread object abstraction.
19
20
21
22
23
24
 */
#include <signal.h>
#include <memory.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
25
26
#include <stdio.h>
#include <string.h>
27
#include <unistd.h>
28
#include <time.h>
29
30
31
32
#include <fcntl.h>
#include <assert.h>
#include <limits.h>
#include <sched.h>
33
#include "boilerplate/signal.h"
34
#include "boilerplate/atomic.h"
35
#include "boilerplate/lock.h"
36
37
#include "copperplate/traceobj.h"
#include "copperplate/threadobj.h"
38
#include "copperplate/syncobj.h"
39
#include "copperplate/cluster.h"
40
#include "copperplate/clockobj.h"
41
#include "copperplate/eventobj.h"
42
#include "copperplate/heapobj.h"
43
#include "internal.h"
44

45
46
union copperplate_wait_union {
	struct syncluster_wait_struct syncluster_wait;
47
	struct eventobj_wait_struct eventobj_wait;
48
49
};

50
51
52
53
54
union main_wait_union {
	union copperplate_wait_union copperplate_wait;
	char untyped_wait[1024];
};

55
static void finalize_thread(void *p);
56

57
58
59
static void set_global_priority(struct threadobj *thobj, int policy,
				const struct sched_param_ex *param_ex);

60
61
static int request_setschedparam(struct threadobj *thobj, int policy,
				 const struct sched_param_ex *param_ex);
62
63
64

static int request_cancel(struct threadobj *thobj);

65
66
static sigset_t sigperiod_set;

67
static int threadobj_agent_prio;
68

69
int threadobj_high_prio;
70

71
int threadobj_irq_prio;
72

73
74
#ifdef HAVE_TLS
__thread __attribute__ ((tls_model (CONFIG_XENO_TLS_MODEL)))
75
struct threadobj *__threadobj_current;
76
#endif
77

78
79
80
81
/*
 * We need the thread object key regardless of whether TLS is
 * available to us, to run the thread finalizer routine.
 */
82
83
pthread_key_t threadobj_tskey;

84
void threadobj_init_key(void)
85
{
86
	if (pthread_key_create(&threadobj_tskey, finalize_thread))
87
		early_panic("failed to allocate TSD key");
88
89
}

90
91
92
93
94
95
96
97
#ifdef CONFIG_XENO_PSHARED

static pid_t agent_pid;

#define RMT_SETSCHED	0
#define RMT_CANCEL	1

struct remote_cancel {
98
	pthread_t ptid;
99
100
	int policy;
	struct sched_param_ex param_ex;
101
102
103
};

struct remote_setsched {
104
	pthread_t ptid;
105
106
	int policy;
	struct sched_param_ex param_ex;
107
108
109
110
111
112
113
114
115
116
117
118
};

struct remote_request {
	int req;	/* RMT_xx */
	union {
		struct remote_cancel cancel;
		struct remote_setsched setsched;
	} u;
};

static int agent_prologue(void *arg)
{
119
	agent_pid = get_thread_pid();
120
	copperplate_set_current_name("remote-agent");
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
	threadobj_set_current(THREADOBJ_IRQCONTEXT);

	return 0;
}

static void *agent_loop(void *arg)
{
	struct remote_request *rq;
	siginfo_t si;
	sigset_t set;
	int sig, ret;

	sigemptyset(&set);
	sigaddset(&set, SIGAGENT);

	for (;;) {
		sig = __RT(sigwaitinfo(&set, &si));
		if (sig < 0) {
			if (errno == EINTR)
				continue;
			panic("agent thread cannot wait for request, %s",
			      symerror(-errno));
		}
		rq = si.si_ptr;
		switch (rq->req) {
		case RMT_SETSCHED:
147
			ret = copperplate_renice_local_thread(rq->u.setsched.ptid,
148
149
							      rq->u.setsched.policy,
							      &rq->u.setsched.param_ex);
150
151
			break;
		case RMT_CANCEL:
152
153
154
155
			if (rq->u.cancel.policy != -1)
				copperplate_renice_local_thread(rq->u.cancel.ptid,
								rq->u.cancel.policy,
								&rq->u.cancel.param_ex);
156
			ret = pthread_cancel(rq->u.cancel.ptid);
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
			break;
		default:
			panic("invalid remote request #%d", rq->req);
		}
		if (ret)
			warning("remote request #%d failed, %s",
				rq->req, symerror(ret));
		xnfree(rq);
	}

	return NULL;
}

static inline int send_agent(struct threadobj *thobj,
			     struct remote_request *rq)
{
	union sigval val = { .sival_ptr = rq };
174
175
176
177
178
179
180

	/*
	 * We are not supposed to issue remote requests when nobody
	 * else may share our session.
	 */
	assert(agent_pid != 0);

181
182
183
184
185
186
187
188
	/*
	 * XXX: No backtracing, may legitimately fail if the remote
	 * process goes away (hopefully cleanly). However, the request
	 * blocks attached to unprocessed pending signals may leak, as
	 * requests are fully asynchronous. Fortunately, processes
	 * creating user threads are unlikely to ungracefully leave
	 * the session they belong to intentionally.
	 */
189
	return __RT(sigqueue(agent_pid, SIGAGENT, val));
190
191
192
193
194
}

static void start_agent(void)
{
	struct corethread_attributes cta;
195
	pthread_t ptid;
196
197
198
199
200
201
202
203
204
205
206
207
208
209
	sigset_t set;
	int ret;

	/*
	 * CAUTION: we expect all internal/user threads created by
	 * Copperplate to inherit this signal mask, otherwise
	 * sigqueue(SIGAGENT) might be delivered to the wrong
	 * thread. So make sure the agent support is set up early
	 * enough.
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGAGENT);
	pthread_sigmask(SIG_BLOCK, &set, NULL);

210
	cta.policy = threadobj_agent_prio ? SCHED_CORE : SCHED_OTHER;
211
	cta.param_ex.sched_priority = threadobj_agent_prio;
212
213
214
	cta.prologue = agent_prologue;
	cta.run = agent_loop;
	cta.arg = NULL;
215
	cta.stacksize = PTHREAD_STACK_DEFAULT;
216
217
	cta.detachstate = PTHREAD_CREATE_DETACHED;

218
	ret = copperplate_create_thread(&cta, &ptid);
219
220
221
222
223
224
225
226
227
228
229
230
231
	if (ret)
		panic("failed to start agent thread, %s", symerror(ret));
}

#else  /* !CONFIG_XENO_PSHARED */

static inline void start_agent(void)
{
	/* No agent in private (process-local) session. */
}

#endif /* !CONFIG_XENO_PSHARED */

232
#ifdef CONFIG_XENO_COBALT
233

234
235
#include "cobalt/internal.h"

236
237
static inline void pkg_init_corespec(void)
{
238
239
240
241
242
243
	/*
	 * We must have CAP_SYS_NICE since we reached this code either
	 * as root or as a member of the allowed group, as a result of
	 * binding the current process to the Cobalt core earlier in
	 * libcobalt's setup code.
	 */
244
245
	threadobj_irq_prio = sched_get_priority_max_ex(SCHED_CORE);
	threadobj_high_prio = sched_get_priority_max_ex(SCHED_FIFO);
246
	threadobj_agent_prio = threadobj_high_prio;
247
248
}

249
static inline int threadobj_init_corespec(struct threadobj *thobj)
250
{
251
	return 0;
252
253
}

254
255
256
257
static inline void threadobj_uninit_corespec(struct threadobj *thobj)
{
}

258
259
#ifdef CONFIG_XENO_PSHARED

260
static inline int threadobj_setup_corespec(struct threadobj *thobj)
261
{
262
	thobj->core.handle = cobalt_get_current();
263
	thobj->core.u_winoff = (void *)cobalt_get_current_window() -
264
		cobalt_umm_shared;
265
266

	return 0;
267
}
268

269
270
271
272
273
#else /* !CONFIG_XENO_PSHARED */

static inline int threadobj_setup_corespec(struct threadobj *thobj)
{
	thobj->core.handle = cobalt_get_current();
274
	thobj->core.u_window = cobalt_get_current_window();
275
276
277
278
279
280

	return 0;
}

#endif /* !CONFIG_XENO_PSHARED */

281
static inline void threadobj_cleanup_corespec(struct threadobj *thobj)
282
283
284
{
}

285
static inline void threadobj_run_corespec(struct threadobj *thobj)
286
{
287
	cobalt_thread_harden();
288
289
}

290
291
292
293
294
static inline void threadobj_cancel_1_corespec(struct threadobj *thobj) /* thobj->lock held */
{
}

static inline void threadobj_cancel_2_corespec(struct threadobj *thobj) /* thobj->lock held */
295
{
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
	/*
	 * Send a SIGDEMT signal to demote the target thread, to make
	 * sure pthread_cancel() will be effective asap.
	 *
	 * In effect, the thread is kicked out of any blocking
	 * syscall, a relax is forced on it (via a mayday trap if
	 * required), and it is then required to leave the real-time
	 * scheduling class.
	 *
	 * - this makes sure the thread returns with EINTR from the
	 * syscall then hits a cancellation point asap.
	 *
	 * - this ensures that the thread can receive the cancellation
	 * signal in case asynchronous cancellation is enabled and get
	 * kicked out from syscall-less code in primary mode
	 * (e.g. busy loops).
	 *
	 * - this makes sure the thread won't preempt the caller
	 * indefinitely when resuming due to priority enforcement
	 * (i.e. when the target thread has higher Xenomai priority
	 * than the caller of threadobj_cancel()), but will receive
	 * the following cancellation request asap.
	 */
319
	__RT(kill(thobj->pid, SIGDEMT));
320
321
322
323
}

int threadobj_suspend(struct threadobj *thobj) /* thobj->lock held */
{
324
	pid_t pid = thobj->pid;
325
326
	int ret;

327
328
	__threadobj_check_locked(thobj);

329
330
331
	if (thobj->status & __THREAD_S_SUSPENDED)
		return 0;

332
	thobj->status |= __THREAD_S_SUSPENDED;
333
334
335
336
337
338
	if (thobj == threadobj_current()) {
		threadobj_unlock(thobj);
		ret = __RT(kill(pid, SIGSUSP));
		threadobj_lock(thobj);
	} else
		ret = __RT(kill(pid, SIGSUSP));
339

340
	return __bt(-ret);
341
342
343
344
345
346
}

int threadobj_resume(struct threadobj *thobj) /* thobj->lock held */
{
	int ret;

347
348
	__threadobj_check_locked(thobj);

349
	if ((thobj->status & __THREAD_S_SUSPENDED) == 0)
350
351
		return 0;

352
	thobj->status &= ~__THREAD_S_SUSPENDED;
353
	ret = __RT(kill(thobj->pid, SIGRESM));
354

355
	return __bt(-ret);
356
357
}

358
static inline int threadobj_unblocked_corespec(struct threadobj *current)
359
{
360
	return (threadobj_get_window(&current->core)->info & XNBREAK) != 0;
361
362
}

363
int __threadobj_lock_sched(struct threadobj *current)
364
{
365
	if (current->schedlock_depth++ > 0)
366
367
368
369
370
371
372
		return 0;

	/*
	 * In essence, we can't be scheduled out as a result of
	 * locking the scheduler, so no need to drop the thread lock
	 * across this call.
	 */
373
	return __bt(-pthread_setmode_np(0, PTHREAD_LOCK_SCHED, NULL));
374
375
}

376
int threadobj_lock_sched(void)
377
{
378
	struct threadobj *current = threadobj_current();
379

380
381
382
	/* This call is lock-free over Cobalt. */
	return __bt(__threadobj_lock_sched(current));
}
383

384
385
int __threadobj_unlock_sched(struct threadobj *current)
{
386
	/*
387
388
389
390
	 * Higher layers may not know about the current scheduler
	 * locking level and fully rely on us to track it, so we
	 * gracefully handle unbalanced calls here, and let them
	 * decide of the outcome in case of error.
391
	 */
392
	if (current->schedlock_depth == 0)
393
		return __bt(-EINVAL);
394

395
	if (--current->schedlock_depth > 0)
396
397
		return 0;

398
	return __bt(-pthread_setmode_np(PTHREAD_LOCK_SCHED, 0, NULL));
399
400
}

401
402
403
404
405
406
407
408
int threadobj_unlock_sched(void)
{
	struct threadobj *current = threadobj_current();

	/* This call is lock-free over Cobalt. */
	return __bt(__threadobj_unlock_sched(current));
}

409
int threadobj_set_mode(int clrmask, int setmask, int *mode_r) /* current->lock held */
410
{
411
	struct threadobj *current = threadobj_current();
412
	int __clrmask = 0, __setmask = 0;
413

414
	__threadobj_check_locked(current);
415

416
417
418
419
420
421
422
423
424
425
	if (setmask & __THREAD_M_WARNSW)
		__setmask |= PTHREAD_WARNSW;
	else if (clrmask & __THREAD_M_WARNSW)
		__clrmask |= PTHREAD_WARNSW;

	if (setmask & __THREAD_M_CONFORMING)
		__setmask |= PTHREAD_CONFORMING;
	else if (clrmask & __THREAD_M_CONFORMING)
		__clrmask |= PTHREAD_CONFORMING;

426
	if (setmask & __THREAD_M_LOCK)
427
		__threadobj_lock_sched_once(current);
428
	else if (clrmask & __THREAD_M_LOCK)
429
		__threadobj_unlock_sched(current);
430
431

	if (mode_r || __setmask || __clrmask)
432
		return __bt(-pthread_setmode_np(__clrmask, __setmask, mode_r));
433
434

	return 0;
435
436
}

437
438
439
440
441
442
443
444
445
446
447
static inline int map_priority_corespec(int policy,
					const struct sched_param_ex *param_ex)
{
	int prio;

	prio = cobalt_sched_weighted_prio(policy, param_ex);
	assert(prio >= 0);

	return prio;
}

448
449
450
451
452
453
static inline int prepare_rr_corespec(struct threadobj *thobj, int policy,
				      const struct sched_param_ex *param_ex) /* thobj->lock held */
{
	return policy;
}

454
455
static inline int enable_rr_corespec(struct threadobj *thobj,
				     const struct sched_param_ex *param_ex) /* thobj->lock held */
456
{
457
458
	return 0;
}
459

460
static inline void disable_rr_corespec(struct threadobj *thobj) /* thobj->lock held */
461
462
{
	/* nop */
463
464
}

465
466
467
468
469
int threadobj_stat(struct threadobj *thobj, struct threadobj_stat *p) /* thobj->lock held */
{
	struct cobalt_threadstat stat;
	int ret;

470
471
	__threadobj_check_locked(thobj);

472
	ret = cobalt_thread_stat(thobj->pid, &stat);
473
474
475
	if (ret)
		return __bt(ret);

476
	p->cpu = stat.cpu;
477
478
479
480
	p->status = stat.status;
	p->xtime = stat.xtime;
	p->msw = stat.msw;
	p->csw = stat.csw;
481
	p->xsc = stat.xsc;
482
	p->pf = stat.pf;
483
	p->timeout = stat.timeout;
484
	p->schedlock = thobj->schedlock_depth;
485
486
487
488

	return 0;
}

489
490
#else /* CONFIG_XENO_MERCURY */

491
492
static int threadobj_lock_prio;

493
494
static void unblock_sighandler(int sig)
{
495
496
	struct threadobj *current = threadobj_current();

497
	/*
498
499
500
	 * SIGRELS is thread-directed, so referring to
	 * current->run_state locklessly is safe as we are
	 * basically introspecting.
501
	 */
502
503
	if (current->run_state == __THREAD_S_DELAYED)
		current->run_state = __THREAD_S_BREAK;
504
505
}

506
507
508
static void roundrobin_handler(int sig)
{
	/*
509
	 * We do manual round-robin over SCHED_FIFO to allow for
510
511
512
	 * multiple arbitrary time slices (i.e. vs the kernel
	 * pre-defined and fixed one).
	 */
513
	sched_yield();
514
515
}

516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
static void sleep_suspended(void)
{
	sigset_t set;

	/*
	 * A suspended thread is supposed to do nothing but wait for
	 * the wake up signal, so we may happily block all signals but
	 * SIGRESM. Note that SIGRRB won't be accumulated during the
	 * sleep time anyhow, as the round-robin timer is based on
	 * CLOCK_THREAD_CPUTIME_ID, and we'll obviously don't consume
	 * any CPU time while blocked.
	 */
	sigfillset(&set);
	sigdelset(&set, SIGRESM);
	sigsuspend(&set);
}

static void suspend_sighandler(int sig)
{
	sleep_suspended();
}

538
static void nop_sighandler(int sig)
539
540
541
542
{
	/* nop */
}

543
544
545
546
static inline void pkg_init_corespec(void)
{
	struct sigaction sa;

547
548
	/*
	 * We don't have builtin scheduler-lock feature over Mercury,
549
	 * so we emulate it by reserving the highest thread priority
550
	 * level from the SCHED_FIFO class to disable involuntary
551
552
553
554
555
556
557
	 * preemption.
	 *
	 * NOTE: The remote agent thread will also run with the
	 * highest thread priority level (threadobj_agent_prio) in
	 * shared multi-processing mode, which won't affect any thread
	 * holding the scheduler lock, unless the latter has to block
	 * for some reason, defeating the purpose of such lock anyway.
558
	 */
559
	threadobj_irq_prio = sched_get_priority_max(SCHED_FIFO);
560
561
	threadobj_lock_prio = threadobj_irq_prio - 1;
	threadobj_high_prio = threadobj_irq_prio - 2;
562
563
564
565
566
567
568
569
570
571
572
573
	threadobj_agent_prio = threadobj_high_prio;
	/*
	 * We allow a non-privileged process to start a low priority
	 * agent thread only, on the assumption that it lacks
	 * CAP_SYS_NICE, but this is pretty much the maximum extent of
	 * our abilities for such processes. Other internal threads
	 * requiring SCHED_CORE/FIFO scheduling such as the timer
	 * manager won't start properly, therefore the corresponding
	 * services won't be available.
	 */
	if (geteuid())
		threadobj_agent_prio = 0;
574

575
576
	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = unblock_sighandler;
577
	sa.sa_flags = SA_RESTART;
578
	sigaction(SIGRELS, &sa, NULL);
579
	sa.sa_handler = roundrobin_handler;
580
	sigaction(SIGRRB, &sa, NULL);
581
582
	sa.sa_handler = suspend_sighandler;
	sigaction(SIGSUSP, &sa, NULL);
583
	sa.sa_handler = nop_sighandler;
584
	sigaction(SIGRESM, &sa, NULL);
585
	sigaction(SIGPERIOD, &sa, NULL);
586
587
}

588
static inline int threadobj_init_corespec(struct threadobj *thobj)
589
590
{
	pthread_condattr_t cattr;
591
592
593
	int ret;

	thobj->core.rr_timer = NULL;
594
595
596
597
598
599
	/*
	 * Over Mercury, we need an additional per-thread condvar to
	 * implement the complex monitor for the syncobj abstraction.
	 */
	pthread_condattr_init(&cattr);
	pthread_condattr_setpshared(&cattr, mutex_scope_attribute);
600
601
602
603
604
605
	ret = __bt(-pthread_condattr_setclock(&cattr, CLOCK_COPPERPLATE));
	if (ret)
		warning("failed setting condvar clock, %s"
			"(try --disable-clock-monotonic-raw)",
			symerror(ret));
	else
606
		ret = __bt(-pthread_cond_init(&thobj->core.grant_sync, &cattr));
607
	pthread_condattr_destroy(&cattr);
608

609
610
611
#ifdef CONFIG_XENO_WORKAROUND_CONDVAR_PI
	thobj->core.policy_unboosted = -1;
#endif
612
	return ret;
613
614
}

615
616
617
618
619
static inline void threadobj_uninit_corespec(struct threadobj *thobj)
{
	pthread_cond_destroy(&thobj->core.grant_sync);
}

620
static inline int threadobj_setup_corespec(struct threadobj *thobj)
621
{
622
	struct sigevent sev;
623
	sigset_t set;
624
625
	int ret;

626
627
628
629
630
631
632
633
634
635
636
637
	/*
	 * Do the per-thread setup for supporting the suspend/resume
	 * actions over Mercury. We have two basic requirements for
	 * this mechanism:
	 *
	 * - suspended requests must be handled asap, regardless of
	 * what the target thread is doing when notified (syscall
	 * wait, pure runtime etc.), hence the use of signals.
	 *
	 * - we must process the suspension signal on behalf of the
	 * target thread, as we want that thread to block upon
	 * receipt.
638
639
640
	 *
	 * In addition, we block the periodic signal, which we only
	 * want to receive from within threadobj_wait_period().
641
642
643
	 */
	sigemptyset(&set);
	sigaddset(&set, SIGRESM);
644
	sigaddset(&set, SIGPERIOD);
645
	pthread_sigmask(SIG_BLOCK, &set, NULL);
646

647
	/*
648
	 * Create the per-thread round-robin timer.
649
	 */
650
	memset(&sev, 0, sizeof(sev));
651
	sev.sigev_signo = SIGRRB;
652
	sev.sigev_notify = SIGEV_SIGNAL|SIGEV_THREAD_ID;
653
	sev.sigev_notify_thread_id = threadobj_get_pid(thobj);
654
655
	ret = timer_create(CLOCK_THREAD_CPUTIME_ID, &sev,
			   &thobj->core.rr_timer);
656
657
	if (ret)
		return __bt(-errno);
658
659

	return 0;
660
661
}

662
static inline void threadobj_cleanup_corespec(struct threadobj *thobj)
663
{
664
665
	if (thobj->core.rr_timer)
		timer_delete(thobj->core.rr_timer);
666
667
}

668
static inline void threadobj_run_corespec(struct threadobj *thobj)
669
670
671
{
}

672
static inline void threadobj_cancel_1_corespec(struct threadobj *thobj) /* thobj->lock held */
673
{
674
	/*
675
676
	 * If the target thread we are about to cancel gets suspended
	 * while it is currently warming up, we have to unblock it
677
	 * from sleep_suspended(), so that we don't get stuck in
678
679
	 * cancel_sync(), waiting for a warmed up state which will
	 * never come.
680
681
682
683
	 *
	 * Just send it SIGRESM unconditionally, this will either
	 * unblock it if the thread waits in sleep_suspended(), or
	 * lead to a nop since that signal is blocked otherwise.
684
	 */
685
	copperplate_kill_tid(thobj->pid, SIGRESM);
686
687
688
689
}

static inline void threadobj_cancel_2_corespec(struct threadobj *thobj) /* thobj->lock held */
{
690
691
}

692
int threadobj_suspend(struct threadobj *thobj) /* thobj->lock held */
693
{
694
	__threadobj_check_locked(thobj);
695

696
697
698
	if (thobj == threadobj_current()) {
		thobj->status |= __THREAD_S_SUSPENDED;
		threadobj_unlock(thobj);
699
		sleep_suspended();
700
701
		threadobj_lock(thobj);
	} else if ((thobj->status & __THREAD_S_SUSPENDED) == 0) {
702
703
704
705
706
707
		/*
		 * We prevent suspension requests from cumulating, so
		 * that we always have a flat, consistent sequence of
		 * alternate suspend/resume events. It's up to the
		 * client code to handle nested requests if need be.
		 */
708
		thobj->status |= __THREAD_S_SUSPENDED;
709
		copperplate_kill_tid(thobj->pid, SIGSUSP);
710
711
712
	}

	return 0;
713
714
}

715
int threadobj_resume(struct threadobj *thobj) /* thobj->lock held */
716
{
717
718
	__threadobj_check_locked(thobj);

719
720
721
	if (thobj != threadobj_current() &&
	    (thobj->status & __THREAD_S_SUSPENDED) != 0) {
		thobj->status &= ~__THREAD_S_SUSPENDED;
722
723
724
725
726
		/*
		 * We prevent resumption requests from cumulating. See
		 * threadobj_suspend().
		 */
		copperplate_kill_tid(thobj->pid, SIGRESM);
727
	}
728

729
	return 0;
730
731
}

732
static inline int threadobj_unblocked_corespec(struct threadobj *current)
733
{
734
	return current->run_state != __THREAD_S_DELAYED;
735
736
}

737
int __threadobj_lock_sched(struct threadobj *current) /* current->lock held */
738
{
739
740
	struct sched_param_ex param_ex;
	int ret;
741

742
	__threadobj_check_locked(current);
743

744
745
	if (current->schedlock_depth > 0)
		goto done;
746

747
	current->core.schedparam_unlocked = current->schedparam;
748
	current->core.policy_unlocked = current->policy;
749
	param_ex.sched_priority = threadobj_lock_prio;
750
	ret = threadobj_set_schedparam(current, SCHED_FIFO, &param_ex);
751
752
753
754
	if (ret)
		return __bt(ret);
done:
	current->schedlock_depth++;
755

756
	return 0;
757
758
}

759
int threadobj_lock_sched(void)
760
{
761
	struct threadobj *current = threadobj_current();
762
763
764
765
766
767
768
769
770
771
772
	int ret;

	threadobj_lock(current);
	ret = __threadobj_lock_sched(current);
	threadobj_unlock(current);

	return __bt(ret);
}

int __threadobj_unlock_sched(struct threadobj *current) /* current->lock held */
{
773
	__threadobj_check_locked(current);
774

775
	if (current->schedlock_depth == 0)
776
777
		return __bt(-EINVAL);

778
	if (--current->schedlock_depth > 0)
779
780
		return 0;

781
782
783
	return __bt(threadobj_set_schedparam(current,
					     current->core.policy_unlocked,
					     &current->core.schedparam_unlocked));
784
785
}

786
787
788
789
790
791
792
793
794
795
796
797
int threadobj_unlock_sched(void)
{
	struct threadobj *current = threadobj_current();
	int ret;

	threadobj_lock(current);
	ret = __threadobj_unlock_sched(current);
	threadobj_unlock(current);

	return __bt(ret);
}

798
int threadobj_set_mode(int clrmask, int setmask, int *mode_r) /* current->lock held */
799
{
800
	struct threadobj *current = threadobj_current();
801
802
	int ret = 0, old = 0;

803
	__threadobj_check_locked(current);
804

805
	if (current->schedlock_depth > 0)
806
807
		old |= __THREAD_M_LOCK;

808
	if (setmask & __THREAD_M_LOCK) {
809
		ret = __threadobj_lock_sched_once(current);
810
811
812
		if (ret == -EBUSY)
			ret = 0;
	} else if (clrmask & __THREAD_M_LOCK)
813
		__threadobj_unlock_sched(current);
814

815
	if (mode_r)
816
817
		*mode_r = old;

818
	return __bt(ret);
819
820
}

821
822
823
824
825
826
static inline int map_priority_corespec(int policy,
					const struct sched_param_ex *param_ex)
{
	return param_ex->sched_priority;
}

827
828
829
830
831
832
static inline int prepare_rr_corespec(struct threadobj *thobj, int policy,
				      const struct sched_param_ex *param_ex) /* thobj->lock held */
{
	return SCHED_FIFO;
}

833
834
static int enable_rr_corespec(struct threadobj *thobj,
			      const struct sched_param_ex *param_ex) /* thobj->lock held */
835
{
836
	struct itimerspec value;
837
	int ret;
838

839
840
841
842
843
	value.it_interval = param_ex->sched_rr_quantum;
	value.it_value = value.it_interval;
	ret = timer_settime(thobj->core.rr_timer, 0, &value, NULL);
	if (ret)
		return __bt(-errno);
844

845
846
	return 0;
}
847

848
static void disable_rr_corespec(struct threadobj *thobj) /* thobj->lock held */
849
850
{
 	struct itimerspec value;
851

852
853
854
855
	value.it_value.tv_sec = 0;
	value.it_value.tv_nsec = 0;
	value.it_interval = value.it_value;
	timer_settime(thobj->core.rr_timer, 0, &value, NULL);
856
857
}

858
859
860
int threadobj_stat(struct threadobj *thobj,
		   struct threadobj_stat *stat) /* thobj->lock held */
{
861
	char procstat[64], buf[BUFSIZ], *p;
862
	struct timespec now, delta;
863
864
	FILE *fp;
	int n;
865

866
867
	__threadobj_check_locked(thobj);

868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
	snprintf(procstat, sizeof(procstat), "/proc/%d/stat", thobj->pid);
	fp = fopen(procstat, "r");
	if (fp == NULL)
		return -EINVAL;

	p = fgets(buf, sizeof(buf), fp);
	fclose(fp);

	if (p == NULL)
		return -EIO;

	p += strlen(buf);
	for (n = 0; n < 14; n++) {
		while (*--p != ' ') {
			if (p <= buf)
				return -EINVAL;
		}
	}
886

887
	stat->cpu = atoi(++p);
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
	stat->status = threadobj_get_status(thobj);

	if (thobj->run_state & (__THREAD_S_TIMEDWAIT|__THREAD_S_DELAYED)) {
		__RT(clock_gettime(CLOCK_COPPERPLATE, &now));
		timespec_sub(&delta, &thobj->core.timeout, &now);
		stat->timeout = timespec_scalar(&delta);
		/*
		 * The timeout might fire as we are calculating the
		 * delta: sanitize any negative value as 1.
		 */
		if ((sticks_t)stat->timeout < 0)
			stat->timeout = 1;
	} else
		stat->timeout = 0;

903
904
	stat->schedlock = thobj->schedlock_depth;

905
906
907
	return 0;
}

908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
#ifdef CONFIG_XENO_WORKAROUND_CONDVAR_PI

/*
 * This workaround does NOT deal with concurrent updates of the caller
 * priority by other threads while the former is boosted. If your code
 * depends so much on strict PI to fix up CPU starvation, but you
 * insist on using a broken glibc that does not implement PI properly
 * nevertheless, then you have to refrain from issuing
 * pthread_setschedparam() for threads which might be currently
 * boosted.
 */
static void __threadobj_boost(void)
{
	struct threadobj *current = threadobj_current();
	struct sched_param param = {
		.sched_priority = threadobj_irq_prio, /* Highest one. */
	};
	int ret;

	if (current == NULL)	/* IRQ or invalid context */
		return;

	if (current->schedlock_depth > 0) {
		current->core.policy_unboosted = SCHED_FIFO;
		current->core.schedparam_unboosted.sched_priority = threadobj_lock_prio;
	} else {
		current->core.policy_unboosted = current->policy;
		current->core.schedparam_unboosted = current->schedparam;
	}
937
	compiler_barrier();
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014

	ret = pthread_setschedparam(current->ptid, SCHED_FIFO, &param);
	if (ret) {
		current->core.policy_unboosted = -1;
		warning("thread boost failed, %s", symerror(-ret));
	}
}

static void __threadobj_unboost(void)

{
	struct threadobj *current = threadobj_current();
	struct sched_param param;
	int ret;

	if (current == NULL) 	/* IRQ or invalid context */
		return;

	param.sched_priority = current->core.schedparam_unboosted.sched_priority;

	ret = pthread_setschedparam(current->ptid,
				    current->core.policy_unboosted, &param);
	if (ret)
		warning("thread unboost failed, %s", symerror(-ret));

	current->core.policy_unboosted = -1;
}

int threadobj_cond_timedwait(pthread_cond_t *cond,
			     pthread_mutex_t *lock,
			     const struct timespec *timeout)
{
	int ret;

	__threadobj_boost();
	ret = pthread_cond_timedwait(cond, lock, timeout);
	__threadobj_unboost();

	return ret;
}

int threadobj_cond_wait(pthread_cond_t *cond,
			pthread_mutex_t *lock)
{
	int ret;

	__threadobj_boost();
	ret = pthread_cond_wait(cond, lock);
	__threadobj_unboost();

	return ret;
}

int threadobj_cond_signal(pthread_cond_t *cond)
{
	int ret;

	__threadobj_boost();
	ret = pthread_cond_signal(cond);
	__threadobj_unboost();

	return ret;
}

int threadobj_cond_broadcast(pthread_cond_t *cond)
{
	int ret;

	__threadobj_boost();
	ret = pthread_cond_broadcast(cond);
	__threadobj_unboost();

	return ret;
}

#endif /* !CONFIG_XENO_WORKAROUND_CONDVAR_PI */

1015
1016
#endif /* CONFIG_XENO_MERCURY */

1017
1018
static int request_setschedparam(struct threadobj *thobj, int policy,
				 const struct sched_param_ex *param_ex)
1019
1020
1021
{				/* thobj->lock held */
	int ret;

1022
1023
1024
1025
1026
1027
1028
1029
1030
#ifdef CONFIG_XENO_PSHARED
	struct remote_request *rq;

	if (unlikely(!threadobj_local_p(thobj))) {
		rq = xnmalloc(sizeof(*rq));
		if (rq == NULL)
			return -ENOMEM;

		rq->req = RMT_SETSCHED;
1031
		rq->u.setsched.ptid = thobj->ptid;
1032
1033
		rq->u.setsched.policy = policy;
		rq->u.setsched.param_ex = *param_ex;
1034
1035
1036
1037
1038
1039
1040

		ret = __bt(send_agent(thobj, rq));
		if (ret)
			xnfree(rq);
		return ret;
	}
#endif
1041
1042
1043
1044
1045
	/*
	 * We must drop the lock temporarily across the setsched
	 * operation, as libcobalt may switch us to secondary mode
	 * when doing so (i.e. libc call to reflect the new priority
	 * on the linux side).
1046
1047
1048
1049
	 *
	 * If we can't relock the target thread, this must mean that
	 * it vanished in the meantime: return -EIDRM for the caller
	 * to handle this case specifically.
1050
1051
	 */
	threadobj_unlock(thobj);
1052
1053
1054
	ret = copperplate_renice_local_thread(thobj->ptid, policy, param_ex);
	if (threadobj_lock(thobj))
		ret = -EIDRM;
1055
1056

	return ret;
1057
1058
1059
1060
}

static int request_cancel(struct threadobj *thobj) /* thobj->lock held, dropped. */
{
1061
1062
	struct threadobj *current = threadobj_current();
	int thprio = thobj->global_priority;
1063
	pthread_t ptid = thobj->ptid;
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
#ifdef CONFIG_XENO_PSHARED
	struct remote_request *rq;
	int ret;

	if (unlikely(!threadobj_local_p(thobj))) {
		threadobj_unlock(thobj);
		rq = xnmalloc(sizeof(*rq));
		if (rq == NULL)
			return -ENOMEM;

		rq->req = RMT_CANCEL;
1075
		rq->u.cancel.ptid = ptid;
1076
1077
1078
1079
1080
		rq->u.cancel.policy = -1;
		if (current) {
			rq->u.cancel.policy = current->policy;
			rq->u.cancel.param_ex = current->schedparam;
		}
1081
1082
1083
1084
1085
1086
1087
1088
		ret = __bt(send_agent(thobj, rq));
		if (ret)
			xnfree(rq);
		return ret;
	}
#endif
	threadobj_unlock(thobj);

1089
1090
1091
1092
1093
1094
1095
1096
1097
	/*
	 * The caller will have to wait for the killed thread to enter
	 * its finalizer, so we boost the latter thread to prevent a
	 * priority inversion if need be.
	 *
	 * NOTE: Since we dropped the lock, we might race if ptid
	 * disappears while we are busy killing it, glibc will check
	 * and dismiss if so.
	 */
1098

1099
1100
1101
	if (current && thprio < current->global_priority)
		copperplate_renice_local_thread(ptid, current->policy,
						&current->schedparam);
1102
	pthread_cancel(ptid);
1103
1104
1105
1106

	return 0;
}

1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
void *__threadobj_alloc(size_t tcb_struct_size,
			size_t wait_union_size,
			int thobj_offset)
{
	struct threadobj *thobj;
	void *p;

	if (wait_union_size < sizeof(union copperplate_wait_union))
		wait_union_size = sizeof(union copperplate_wait_union);

	tcb_struct_size = (tcb_struct_size+sizeof(double)-1) & ~(sizeof(double)-1);
	p = xnmalloc(tcb_struct_size + wait_union_size);
	if (p == NULL)
		return NULL;

	thobj = p + thobj_offset;
1123
	thobj->core_offset = thobj_offset;
1124
	thobj->wait_union = __moff(p + tcb_struct_size);
1125
1126
1127
1128
1129
	thobj->wait_size = wait_union_size;

	return p;
}

1130
1131
1132
1133
1134
static void set_global_priority(struct threadobj *thobj, int policy,
				const struct sched_param_ex *param_ex)
{
	thobj->schedparam = *param_ex;
	thobj->policy = policy;
1135
	thobj->global_priority = map_priority_corespec(policy, param_ex);
1136
1137
}