Commit eb04f2f0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'core-rcu-for-linus' of...

Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (78 commits)
  Revert "rcu: Decrease memory-barrier usage based on semi-formal proof"
  net,rcu: convert call_rcu(prl_entry_destroy_rcu) to kfree
  batman,rcu: convert call_rcu(softif_neigh_free_rcu) to kfree_rcu
  batman,rcu: convert call_rcu(neigh_node_free_rcu) to kfree()
  batman,rcu: convert call_rcu(gw_node_free_rcu) to kfree_rcu
  net,rcu: convert call_rcu(kfree_tid_tx) to kfree_rcu()
  net,rcu: convert call_rcu(xt_osf_finger_free_rcu) to kfree_rcu()
  net/mac80211,rcu: convert call_rcu(work_free_rcu) to kfree_rcu()
  net,rcu: convert call_rcu(wq_free_rcu) to kfree_rcu()
  net,rcu: convert call_rcu(phonet_device_rcu_free) to kfree_rcu()
  perf,rcu: convert call_rcu(swevent_hlist_release_rcu) to kfree_rcu()
  perf,rcu: convert call_rcu(free_ctx) to kfree_rcu()
  net,rcu: convert call_rcu(__nf_ct_ext_free_rcu) to kfree_rcu()
  net,rcu: convert call_rcu(net_generic_release) to kfree_rcu()
  net,rcu: convert call_rcu(netlbl_unlhsh_free_addr6) to kfree_rcu()
  net,rcu: convert call_rcu(netlbl_unlhsh_free_addr4) to kfree_rcu()
  security,rcu: convert call_rcu(sel_netif_free) to kfree_rcu()
  net,rcu: convert call_rcu(xps_dev_maps_release) to kfree_rcu()
  net,rcu: convert call_rcu(xps_map_release) to kfree_rcu()
  net,rcu: convert call_rcu(rps_map_release) to kfree_rcu()
  ...
parents 5765040e 80d02085
......@@ -21,7 +21,7 @@ rcu.txt
RTFP.txt
- List of RCU papers (bibliography) going back to 1980.
stallwarn.txt
- RCU CPU stall warnings (CONFIG_RCU_CPU_STALL_DETECTOR)
- RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
torture.txt
- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
trace.txt
......
Using RCU's CPU Stall Detector
The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables
RCU's CPU stall detector, which detects conditions that unduly delay
RCU grace periods. The stall detector's idea of what constitutes
"unduly delayed" is controlled by a set of C preprocessor macros:
The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall
detector, which detects conditions that unduly delay RCU grace periods.
This module parameter enables CPU stall detection by default, but
may be overridden via boot-time parameter or at runtime via sysfs.
The stall detector's idea of what constitutes "unduly delayed" is
controlled by a set of kernel configuration variables and cpp macros:
RCU_SECONDS_TILL_STALL_CHECK
CONFIG_RCU_CPU_STALL_TIMEOUT
This macro defines the period of time that RCU will wait from
the beginning of a grace period until it issues an RCU CPU
stall warning. This time period is normally ten seconds.
This kernel configuration parameter defines the period of time
that RCU will wait from the beginning of a grace period until it
issues an RCU CPU stall warning. This time period is normally
ten seconds.
RCU_SECONDS_TILL_STALL_RECHECK
This macro defines the period of time that RCU will wait after
issuing a stall warning until it issues another stall warning
for the same stall. This time period is normally set to thirty
seconds.
for the same stall. This time period is normally set to three
times the check interval plus thirty seconds.
RCU_STALL_RAT_DELAY
......
......@@ -10,34 +10,46 @@ for rcutree and next for rcutiny.
CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
These implementations of RCU provides five debugfs files under the
top-level directory RCU: rcu/rcudata (which displays fields in struct
rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
rcu/rcudata), rcu/rcugp (which displays grace-period counters),
rcu/rcuhier (which displays the struct rcu_node hierarchy), and
rcu/rcu_pending (which displays counts of the reasons that the
rcu_pending() function decided that there was core RCU work to do).
These implementations of RCU provides several debugfs files under the
top-level directory "rcu":
rcu/rcudata:
Displays fields in struct rcu_data.
rcu/rcudata.csv:
Comma-separated values spreadsheet version of rcudata.
rcu/rcugp:
Displays grace-period counters.
rcu/rcuhier:
Displays the struct rcu_node hierarchy.
rcu/rcu_pending:
Displays counts of the reasons rcu_pending() decided that RCU had
work to do.
rcu/rcutorture:
Displays rcutorture test progress.
rcu/rcuboost:
Displays RCU boosting statistics. Only present if
CONFIG_RCU_BOOST=y.
The output of "cat rcu/rcudata" looks as follows:
rcu_sched:
0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
rcu_bh:
0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
The first section lists the rcu_data structures for rcu_sched, the second
for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
......@@ -52,17 +64,18 @@ o The number at the beginning of each line is the CPU number.
substantially larger than the number of actual CPUs.
o "c" is the count of grace periods that this CPU believes have
completed. CPUs in dynticks idle mode may lag quite a ways
behind, for example, CPU 4 under "rcu_sched" above, which has
slept through the past 25 RCU grace periods. It is not unusual
to see CPUs lagging by thousands of grace periods.
completed. Offlined CPUs and CPUs in dynticks idle mode may
lag quite a ways behind, for example, CPU 6 under "rcu_sched"
above, which has been offline through not quite 40,000 RCU grace
periods. It is not unusual to see CPUs lagging by thousands of
grace periods.
o "g" is the count of grace periods that this CPU believes have
started. Again, CPUs in dynticks idle mode may lag behind.
If the "c" and "g" values are equal, this CPU has already
reported a quiescent state for the last RCU grace period that
it is aware of, otherwise, the CPU believes that it owes RCU a
quiescent state.
started. Again, offlined CPUs and CPUs in dynticks idle mode
may lag behind. If the "c" and "g" values are equal, this CPU
has already reported a quiescent state for the last RCU grace
period that it is aware of, otherwise, the CPU believes that it
owes RCU a quiescent state.
o "pq" indicates that this CPU has passed through a quiescent state
for the current grace period. It is possible for "pq" to be
......@@ -81,7 +94,8 @@ o "pqc" indicates which grace period the last-observed quiescent
the next grace period!
o "qp" indicates that RCU still expects a quiescent state from
this CPU.
this CPU. Offlined CPUs and CPUs in dyntick idle mode might
well have qp=1, which is OK: RCU is still ignoring them.
o "dt" is the current value of the dyntick counter that is incremented
when entering or leaving dynticks idle state, either by the
......@@ -108,7 +122,7 @@ o "df" is the number of times that some other CPU has forced a
o "of" is the number of times that some other CPU has forced a
quiescent state on behalf of this CPU due to this CPU being
offline. In a perfect world, this might neve happen, but it
offline. In a perfect world, this might never happen, but it
turns out that offlining and onlining a CPU can take several grace
periods, and so there is likely to be an extended period of time
when RCU believes that the CPU is online when it really is not.
......@@ -125,6 +139,62 @@ o "ql" is the number of RCU callbacks currently residing on
of what state they are in (new, waiting for grace period to
start, waiting for grace period to end, ready to invoke).
o "qs" gives an indication of the state of the callback queue
with four characters:
"N" Indicates that there are callbacks queued that are not
ready to be handled by the next grace period, and thus
will be handled by the grace period following the next
one.
"R" Indicates that there are callbacks queued that are
ready to be handled by the next grace period.
"W" Indicates that there are callbacks queued that are
waiting on the current grace period.
"D" Indicates that there are callbacks queued that have
already been handled by a prior grace period, and are
thus waiting to be invoked. Note that callbacks in
the process of being invoked are not counted here.
Callbacks in the process of being invoked are those
that have been removed from the rcu_data structures
queues by rcu_do_batch(), but which have not yet been
invoked.
If there are no callbacks in a given one of the above states,
the corresponding character is replaced by ".".
o "kt" is the per-CPU kernel-thread state. The digit preceding
the first slash is zero if there is no work pending and 1
otherwise. The character between the first pair of slashes is
as follows:
"S" The kernel thread is stopped, in other words, all
CPUs corresponding to this rcu_node structure are
offline.
"R" The kernel thread is running.
"W" The kernel thread is waiting because there is no work
for it to do.
"O" The kernel thread is waiting because it has been
forced off of its designated CPU or because its
->cpus_allowed mask permits it to run on other than
its designated CPU.
"Y" The kernel thread is yielding to avoid hogging CPU.
"?" Unknown value, indicates a bug.
The number after the final slash is the CPU that the kthread
is actually running on.
o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
the number of times that this CPU's per-CPU kthread has gone
through its loop servicing invoke_rcu_cpu_kthread() requests.
o "b" is the batch limit for this CPU. If more than this number
of RCU callbacks is ready to invoke, then the remainder will
be deferred.
......@@ -174,14 +244,14 @@ o "gpnum" is the number of grace periods that have started. It is
The output of "cat rcu/rcuhier" looks as follows, with very long lines:
c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
1/1 .>. 0:127 ^0
3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3
3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3
1/1 ..>. 0:127 ^0
3/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
3/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
rcu_bh:
c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
0/1 .>. 0:127 ^0
0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3
0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3
0/1 ..>. 0:127 ^0
0/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
0/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
This is once again split into "rcu_sched" and "rcu_bh" portions,
and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional
......@@ -240,13 +310,20 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
current grace period.
o The characters separated by the ">" indicate the state
of the blocked-tasks lists. A "T" preceding the ">"
of the blocked-tasks lists. A "G" preceding the ">"
indicates that at least one task blocked in an RCU
read-side critical section blocks the current grace
period, while a "." preceding the ">" indicates otherwise.
The character following the ">" indicates similarly for
the next grace period. A "T" should appear in this
field only for rcu-preempt.
period, while a "E" preceding the ">" indicates that
at least one task blocked in an RCU read-side critical
section blocks the current expedited grace period.
A "T" character following the ">" indicates that at
least one task is blocked within an RCU read-side
critical section, regardless of whether any current
grace period (expedited or normal) is inconvenienced.
A "." character appears if the corresponding condition
does not hold, so that "..>." indicates that no tasks
are blocked. In contrast, "GE>T" indicates maximal
inconvenience from blocked tasks.
o The numbers separated by the ":" are the range of CPUs
served by this struct rcu_node. This can be helpful
......@@ -328,6 +405,113 @@ o "nn" is the number of times that this CPU needed nothing. Alert
is due to short-circuit evaluation in rcu_pending().
The output of "cat rcu/rcutorture" looks as follows:
rcutorture test sequence: 0 (test in progress)
rcutorture update version number: 615
The first line shows the number of rcutorture tests that have completed
since boot. If a test is currently running, the "(test in progress)"
string will appear as shown above. The second line shows the number of
update cycles that the current test has started, or zero if there is
no test in progress.
The output of "cat rcu/rcuboost" looks as follows:
0:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16
6:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f
balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6
This information is output only for rcu_preempt. Each two-line entry
corresponds to a leaf rcu_node strcuture. The fields are as follows:
o "n:m" is the CPU-number range for the corresponding two-line
entry. In the sample output above, the first entry covers
CPUs zero through five and the second entry covers CPUs 6
and 7.
o "tasks=TNEB" gives the state of the various segments of the
rnp->blocked_tasks list:
"T" This indicates that there are some tasks that blocked
while running on one of the corresponding CPUs while
in an RCU read-side critical section.
"N" This indicates that some of the blocked tasks are preventing
the current normal (non-expedited) grace period from
completing.
"E" This indicates that some of the blocked tasks are preventing
the current expedited grace period from completing.
"B" This indicates that some of the blocked tasks are in
need of RCU priority boosting.
Each character is replaced with "." if the corresponding
condition does not hold.
o "kt" is the state of the RCU priority-boosting kernel
thread associated with the corresponding rcu_node structure.
The state can be one of the following:
"S" The kernel thread is stopped, in other words, all
CPUs corresponding to this rcu_node structure are
offline.
"R" The kernel thread is running.
"W" The kernel thread is waiting because there is no work
for it to do.
"Y" The kernel thread is yielding to avoid hogging CPU.
"?" Unknown value, indicates a bug.
o "ntb" is the number of tasks boosted.
o "neb" is the number of tasks boosted in order to complete an
expedited grace period.
o "nnb" is the number of tasks boosted in order to complete a
normal (non-expedited) grace period. When boosting a task
that was blocking both an expedited and a normal grace period,
it is counted against the expedited total above.
o "j" is the low-order 16 bits of the jiffies counter in
hexadecimal.
o "bt" is the low-order 16 bits of the value that the jiffies
counter will have when we next start boosting, assuming that
the current grace period does not end beforehand. This is
also in hexadecimal.
o "balk: nt" counts the number of times we didn't boost (in
other words, we balked) even though it was time to boost because
there were no blocked tasks to boost. This situation occurs
when there is one blocked task on one rcu_node structure and
none on some other rcu_node structure.
o "egt" counts the number of times we balked because although
there were blocked tasks, none of them were blocking the
current grace period, whether expedited or otherwise.
o "bt" counts the number of times we balked because boosting
had already been initiated for the current grace period.
o "nb" counts the number of times we balked because there
was at least one task blocking the current non-expedited grace
period that never had blocked. If it is already running, it
just won't help to boost its priority!
o "ny" counts the number of times we balked because it was
not yet time to start boosting.
o "nos" counts the number of times we balked for other
reasons, e.g., the grace period ended first.
CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
These implementations of RCU provides a single debugfs file under the
......@@ -394,9 +578,9 @@ o "neb" is the number of expedited grace periods that have had
o "nnb" is the number of normal grace periods that have had
to resort to RCU priority boosting since boot.
o "j" is the low-order 12 bits of the jiffies counter in hexadecimal.
o "j" is the low-order 16 bits of the jiffies counter in hexadecimal.
o "bt" is the low-order 12 bits of the value that the jiffies counter
o "bt" is the low-order 16 bits of the value that the jiffies counter
will have at the next time that boosting is scheduled to begin.
o In the line beginning with "normal balk", the fields are as follows:
......
......@@ -836,7 +836,6 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
TASKLET: 0 0 0 290
SCHED: 27035 26983 26971 26746
HRTIMER: 0 0 0 0
RCU: 1678 1769 2178 2250
1.3 IDE devices in /proc/ide
......
......@@ -5100,11 +5100,6 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
return err;
}
static void ring_free_rcu(struct rcu_head *head)
{
kfree(container_of(head, struct ixgbe_ring, rcu));
}
/**
* ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
* @adapter: board private structure to clear interrupt scheme on
......@@ -5126,7 +5121,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
/* ixgbe_get_stats64() might access this ring, we must wait
* a grace period before freeing it.
*/
call_rcu(&ring->rcu, ring_free_rcu);
kfree_rcu(ring, rcu);
adapter->rx_ring[i] = NULL;
}
......
......@@ -603,21 +603,13 @@ static int macvlan_port_create(struct net_device *dev)
return err;
}
static void macvlan_port_rcu_free(struct rcu_head *head)
{
struct macvlan_port *port;
port = container_of(head, struct macvlan_port, rcu);
kfree(port);
}
static void macvlan_port_destroy(struct net_device *dev)
{
struct macvlan_port *port = macvlan_port_get(dev);
dev->priv_flags &= ~IFF_MACVLAN_PORT;
netdev_rx_handler_unregister(dev);
call_rcu(&port->rcu, macvlan_port_rcu_free);
kfree_rcu(port, rcu);
}
static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
......
......@@ -414,7 +414,6 @@ enum
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
......
......@@ -47,6 +47,18 @@
extern int rcutorture_runnable; /* for sysctl */
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
extern void rcutorture_record_test_transition(void);
extern void rcutorture_record_progress(unsigned long vernum);
#else
static inline void rcutorture_record_test_transition(void)
{
}
static inline void rcutorture_record_progress(unsigned long vernum)
{
}
#endif
#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
......@@ -68,7 +80,6 @@ extern void call_rcu_sched(struct rcu_head *head,
extern void synchronize_sched(void);
extern void rcu_barrier_bh(void);
extern void rcu_barrier_sched(void);
extern int sched_expedited_torture_stats(char *page);
static inline void __rcu_read_lock_bh(void)
{
......@@ -774,6 +785,7 @@ extern struct debug_obj_descr rcuhead_debug_descr;
static inline void debug_rcu_head_queue(struct rcu_head *head)
{
WARN_ON_ONCE((unsigned long)head & 0x3);
debug_object_activate(head, &rcuhead_debug_descr);
debug_object_active_state(head, &rcuhead_debug_descr,
STATE_RCU_HEAD_READY,
......@@ -797,4 +809,60 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
{
return offset < 4096;
}
static __always_inline
void __kfree_rcu(struct rcu_head *head, unsigned long offset)
{
typedef void (*rcu_callback)(struct rcu_head *);
BUILD_BUG_ON(!__builtin_constant_p(offset));
/* See the kfree_rcu() header comment. */
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
call_rcu(head, (rcu_callback)offset);
}
extern void kfree(const void *);
static inline void __rcu_reclaim(struct rcu_head *head)
{
unsigned long offset = (unsigned long)head->func;
if (__is_kfree_rcu_offset(offset))
kfree((void *)head - offset);
else
head->func(head);
}
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
* @rcu_head: the name of the struct rcu_head within the type of @ptr.
*
* Many rcu callbacks functions just call kfree() on the base structure.
* These functions are trivial, but their size adds up, and furthermore
* when they are used in a kernel module, that module must invoke the
* high-latency rcu_barrier() function at module-unload time.
*
* The kfree_rcu() function handles this issue. Rather than encoding a
* function address in the embedded rcu_head structure, kfree_rcu() instead
* encodes the offset of the rcu_head structure within the base structure.
* Because the functions are not allowed in the low-order 4096 bytes of
* kernel virtual memory, offsets up to 4095 bytes can be accommodated.
* If the offset is larger than 4095 bytes, a compile-time error will
* be generated in __kfree_rcu(). If this error is triggered, you can
* either fall back to use of call_rcu() or rearrange the structure to
* position the rcu_head structure into the first 4096 bytes.
*
* Note that the allowable offset might decrease in the future, for example,
* to allow something like kmem_cache_free_rcu().
*/
#define kfree_rcu(ptr, rcu_head) \
__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
#endif /* __LINUX_RCUPDATE_H */
......@@ -99,6 +99,14 @@ static inline void rcu_note_context_switch(int cpu)
rcu_preempt_note_context_switch();
}
/*
* Take advantage of the fact that there is only one CPU, which
* allows us to ignore virtualization-based context switches.
*/
static inline void rcu_virt_note_context_switch(int cpu)
{
}
/*
* Return the number of grace periods.
*/
......
......@@ -35,6 +35,16 @@ extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
extern void rcu_cpu_stall_reset(void);
/*
* Note a virtualization-based context switch. This is simply a
* wrapper around rcu_note_context_switch(), which allows TINY_RCU
* to save a few bytes.
*/
static inline void rcu_virt_note_context_switch(int cpu)
{
rcu_note_context_switch(cpu);
}
#ifdef CONFIG_TREE_PREEMPT_RCU
extern void exit_rcu(void);
......@@ -58,9 +68,12 @@ static inline void synchronize_rcu_bh_expedited(void)
extern void rcu_barrier(void);
extern unsigned long rcutorture_testseq;
extern unsigned long rcutorture_vernum;
extern long rcu_batches_completed(void);
extern long rcu_batches_completed_bh(void);
extern long rcu_batches_completed_sched(void);
extern void rcu_force_quiescent_state(void);
extern void rcu_bh_force_quiescent_state(void);
extern void rcu_sched_force_quiescent_state(void);
......
......@@ -115,7 +115,6 @@
* sctp/protocol.c
*/
extern struct sock *sctp_get_ctl_sock(void);
extern void sctp_local_addr_free(struct rcu_head *head);
extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
sctp_scope_t, gfp_t gfp,
int flags);
......
......@@ -20,8 +20,7 @@ struct softirq_action;
softirq_name(BLOCK_IOPOLL), \
softirq_name(TASKLET), \
softirq_name(SCHED), \
softirq_name(HRTIMER), \
softirq_name(RCU))
softirq_name(HRTIMER))
/**
* irq_handler_entry - called immediately before the irq action handler
......
......@@ -485,7 +485,7 @@ config TREE_RCU_TRACE
config RCU_BOOST
bool "Enable RCU priority boosting"
depends on RT_MUTEXES && TINY_PREEMPT_RCU
depends on RT_MUTEXES && PREEMPT_RCU
default n
help
This option boosts the priority of preempted RCU readers that
......
......@@ -326,12 +326,6 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
return &css_set_table[index];
}