summaryrefslogtreecommitdiff
path: root/open_issues/libpthread/t/fix_have_kernel_resources.mdwn
blob: 02b6ab05248d88105e197d85634015365abd792b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
[[!meta copyright="Copyright © 2012, 2013, 2014 Free Software Foundation,
Inc."]]

[[!meta license="""[[!toggle id="license" text="GFDL 1.2+"]][[!toggleable
id="license" text="Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version 1.2 or
any later version published by the Free Software Foundation; with no Invariant
Sections, no Front-Cover Texts, and no Back-Cover Texts.  A copy of the license
is included in the section entitled [[GNU Free Documentation
License|/fdl]]."]]"""]]

[[!tag open_issue_libpthread]]

`t/fix_have_kernel_resources`

Address problem mentioned in [[/libpthread]], *Threads' Death*.


# IRC, freenode, #hurd, 2012-08-30

    <braunr> tschwinge: this issue needs more cooperation with the kernel
    <braunr> tschwinge: i.e. the ability to tell the kernel where the stack is,
      so it's unmapped when the thread dies
    <braunr> which requiring another thread to perform this deallocation


## IRC, freenode, #hurd, 2013-05-09

    <bddebian> braunr: Speaking of which, didn't you say you had another "easy"
      task?
    <braunr> bddebian: make a system call that both terminates a thread and
      releases memory
    <braunr> (the memory released being the thread stack)
    <braunr> this way, a thread can completely terminates itself without the
      assistance of a managing thread or deferring work
    <bddebian> braunr: That's "easy" ? :)
    <braunr> bddebian: since it's just a thread_terminate+vm_deallocate, it is
    <braunr> something like thread_terminate_self
    <bddebian> But a syscall not an RPC right?
    <braunr> in hurd terminology, we don't make the distinction
    <braunr> the only real syscalls are mach_msg (obviously) and some to get
      well known port rights
    <braunr> e.g. mach_task_self
    <braunr> everything else should be an RPC but could be a system call for
      performance
    <braunr> since mach was designed to support clusters, it was necessary that
      anything not strictly machine-local was an RPC
    <braunr> and it also helps emulation a lot
    <braunr> so keep doing RPCs :p


## IRC, freenode, #hurd, 2013-05-10

    <braunr> i'm not sure it should only apply to self though
    <braunr> youpi: can we get a quick opinion on this please ?
    <braunr> i've suggested bddebian to work on a new RPC that both terminates
      a thread and releases its stack to help fix libpthread
    <braunr> and initially, i thought of it as operating only on the calling
      thread
    <braunr> do you see any reason to make it work on any thread ?
    <braunr> (e.g. a real thread_terminate + vm_deallocate)
    <braunr> (or any reason not to)
    <youpi> thread stack deallocation is always a burden indeed
    <youpi> I'd tend to think it'd be useful, but perhaps ask the list


## IRC, freenode, #hurd, 2013-06-26

    <braunr> looks like there is a port right leak in libpthread
    <braunr> grmbl, the port leak seems to come from mach_port_destroy being
      buggy :/
    <braunr> hum, apparently we're not the only ones to suffer from port leaks
      wrt mach_port_destroy
    <braunr> ew, libpthread is leaking
    <pinotree> memory or ports?
    <braunr> both
    <pinotree> sounds great ;)
    <braunr> as it is, libpthread doesn't destroy threads
    <braunr> it queues them so they're recycled late
    <braunr> r
    <braunr> but there is confusion between the thread structure itself and its
      internal resources
    <braunr> i.e. there is pthread_alloc which allocates a thread structure,
      and pthread_create which allocates everything else
    <braunr> but on pthread_exit, nothing is destroyed
    <braunr> when a thread structure is reused, its internal resources are
      replaced by new instances
    <pinotree> oh
    <braunr> it's ok for joinable threads but most of our threads are detached
    <braunr> pinotree: as expected, it's bigger than expected :p
    <braunr> so i won't be able to write a quick fix
    <braunr> the true way to fix this is make it possible for threads to free
      their own resources
    <braunr> let's do that :p
    <braunr> ok, got the new thread termination function, i'll build eglibc
      package providing it, then experiment with libpthread
    <pinotree> braunr: iirc there's also a tschwinge patch in the debian eglibc
      about that
    <braunr> ah
    <pinotree> libpthread_fix.diff
    <braunr> i see
    <braunr> thanks for the notice
    <braunr> bddebian:
      http://www.sceen.net/~rbraun/0001-thread_terminate_deallocate.patch
    <braunr> bddebian: this is what it looks like
    <braunr> see, short and easy
    <bddebian> Aye but didn't youpi say not to bother with it??
    <braunr> he did ?
    <braunr> i don't remember
    <bddebian> I thought that was the implication.  Or maybe that was the one I
      already did!?
    <braunr> i'd be interested in reading that
    <braunr> anyway, there still are problems in libpthread, and this call is
      one building block to fix some of them
    <braunr> some important ones
    <braunr> (big leaks)


## IRC, freenode, #hurd, 2013-06-29

    <braunr> damn, i fix leaks in libpthread, only to find out leaks somewhere
      else :(
    <braunr> bddebian: ok, actually it was a bit more complicated than what i
      showed you
    <braunr> because in addition to the stack, the call must also release the
      send right in the caller's ipc space
    <braunr> (it can't be released before since there would be no mean to
      reference the thread to destroy)
    <braunr> or perhaps it should strictly be reserved to self termination
    <braunr> hmm
    <braunr> yes it would probably be simpler
    <braunr> but it should be a decent compromise
    <braunr> i'm close to having a libpthread that doesn't leak anything
    <braunr> and that properly destroys threads and their resources


## IRC, freenode, #hurd, 2013-06-30

    <braunr> bddebian: ok, it was even more tricky, because the kernel would
      save the return value on the user stack (which is released by the call
      and then invalid) before checking for asynchronous software traps (ASTs,
      a kind of software interrupts in mach), and terminating the calling
      thread is done by a deferred AST ... :)
    <braunr> hmm, making threads able to terminate themselves makes rpctrace a
      bit useless :/
    <braunr> well, more restricted

    <braunr> ok so, tough question :
    <braunr> i have a small test program that creates a thread, and inspect its
      state before any thread dies
    <braunr> i can see msg_report_wait requests when using ps
    <braunr> (one per thread)
    <braunr> one of these requests create a new receive right, apparently for
      the second thread in the test program
    <braunr> each time i use ps, i can see the sequence numbers of two receive
      rights increase
    <braunr> i guess these rights are related to proc and signal handling per
      thread
    <braunr> but i can't find what create them
    <braunr> does anyone know ?
    <braunr> tschwing_: ^ :)

    <braunr> again, too many things wrong elsewhere to cleanly destroy threads
      ..
    <braunr> something is deeply wrong with controlling terminals ..


## IRC, freenode, #hurd, 2013-07-01

    <braunr> youpi: if you happen to notice what receive right is created for
      each thread (beyond the obvious port used for blocking and waking up),
      please let me know
    <braunr> it's the only port leak i have with thread destruction
    <braunr> and i think it's related to the proc server since i see the
      sequence number increase every time i use ps

    <braunr> pinotree: my change doesn't fix all the pthread leaks but it's a
      lot better
    <braunr> bddebian: i've spent almost the whole week end trying to find the
      last port leak without success
    <braunr> there is some weird bug related to the controlling tty that hits
      me every time i try to change something
    <braunr> it's the same bug that prevents ttys from being correctly closed
      when using ssh or screen
    <braunr> well maybe not the same, but it's close
    <braunr> some stale receive right kept around for no apparent reason
    <braunr> and i can't find its source


## IRC, freenode, #hurd, 2013-07-02

    <braunr> and btw, i don't think i can make my libpthread patch work
    <braunr> i'll just aim at avoiding leaks, but destroying threads and their
      related resources depends on other changes i don't clearly see


## IRC, freenode, #hurd, 2013-07-03

    <braunr> grmbl, i don't want to give up thread destruction ..


## IRC, freenode, #hurd, 2013-07-15

    <braunr> btw, my work on thread destruction is currently stalled
    <braunr> i don't have much free time right now


## IRC, freenode, #hurd, 2013-09-13

    <braunr> i think i know why my thread_terminate_deallocate patches leak one
      receive port :>
    <braunr> but now i'm not sure of the proper solution
    <braunr> every time a thread is created and destroyed, a receive right is
      leaked
    <braunr> i guess it's simply the reply port ..
    <braunr> grmbl
    <braunr> i guess i have to make it a simpleroutine ...
    <braunr> hm too bad, it's not the reply port :(
    <braunr> it's also leaking some memory
    <braunr> it doesn't seem related to my changes though
    <braunr> stacks, rights, and threads are correctly destroyed
    <braunr> some obscure state is left behind
    <braunr> i wonder how exception ports are dealt with
    <braunr> vminfo seems to confirm memory is leaking in the heap
    <braunr> humpf
    <braunr> oh silly me
    <braunr> i don't detach threads
    <teythoon> well, detach them ;)
    <braunr> hm worse :p
    <braunr> now i get additional dead names
    <braunr> but it's a step forward


## IRC, freenode, #hurd, 2013-09-16

    <braunr> that thread port leak is so strange
    <braunr> the leaked port seems to be created when the new thread starts
      running
    <braunr> so it looks like a port the kernel would implicitely create
    <braunr> hm could it be a thread-specific reply port ?
    <youpi> ah, yes, there is one of those
    <braunr> how come mach/mig-reply.c in glibc isn't thread-safe ?
    <youpi> it is overriden by sysdeps/mach/hurd/img-reply.c I guess
    <youpi> which uses a threadvar for the mig reply port
    <braunr> oh
    <youpi> talking of which, there is also last_value in
      sysdeps/mach/strerror_l.c
    <youpi> strerror_thread_freeres is supposed to get called, but who knows
    <braunr> it does look to be that port
    <youpi> iirc that's the issue which prevents from letting us make threads
      exit on idleness?
    <braunr> one of them
    <youpi> ok
    <braunr> maybe the only one, yes
    <braunr> i see memory leaks but they could be related/normal
    <braunr> (i.e. not actual leaks)
    <braunr> on the other hand, i also can't boot a hurd with my patch
    <braunr> but i consider removing such leaks a priority
    <braunr> does anyone know the semantic difference between
      __mig_put_reply_port and __mig_dealloc_reply_port ?
    <braunr> i guess __mig_dealloc_reply_port is actually a destruction
      operation, right ?
    <youpi> AIUI, dealloc is used when one wants the port not to be reused at
      all
    <youpi> because it has been used as a reference for something, and can
      still be currently in use
    <youpi> while put_reply would be when we're really done with it, and won't
      use it again, and can thus be used as such
    <youpi> or at least something like that
    <braunr> heh
    <braunr> __mig_dealloc_reply_port calls __mach_port_mod_refs, which is a
      RPC, and creates a new reply port when destroying the current one
    <youpi> bah
    <youpi> that's fine, it's a deref of the old port, which is not in the
      reply_port variable any more
    <braunr> it's fine, but still a leak
    <youpi> well, dealloc does not completely deallocs, yes
    <braunr> that's not really the problem here
    <braunr> i've introduced a case that wasn't considered at the time, namely
      that a thread can destroy itself
    <youpi> we probably need another function to be called from the thread exit
    <braunr> i'll simply try with mach_port_destroy
    <braunr> mach_port_destroy seems to be a RPC too ...
    <braunr> grmbl
    <youpi> isn't there a trap version somehow ?
    <braunr> not in libc
    <youpi> erf
    <braunr> at least i know what's wrong now :)
    <braunr> there still is a small memory leak i have to investigate
    <braunr> but outside the stack
    <braunr> the stack, the thread name and the thread are correctly destroyed
    <braunr> slabinfo confirms only one port leak and nothing else is leaked
    <braunr> ok so the port leak was indeed the thread-specific reply port,
      taken care of
    <braunr> there are also memory leaks too


## IRC, freenode, #hurd, 2013-09-17

    <braunr> teythoon: on my side, i'm getting to know our threading
      implementation better
    <braunr> closing to clean thread destruction
    <braunr> x15 ipc will hide reply ports ;p
    <braunr> memory leaks solved \o/
    <braunr> now, have to fix memory release when joining
    <braunr> proper reference counting on detach/join/exit, let's see how it
      goes ..
    <braunr> seems to work fine


## IRC, freenode, #hurd, 2013-09-18

    <braunr> ok i'll soon have gnumach and libc packages including proper
      thread destruction :>
    <teythoon> braunr: why did you have to touch gnumach?
    <braunr> to add a call allowing threads to release ports and memory
    <braunr> i.e. their last self reference, their reply port and their stack
    <braunr> let me public my current patches
    <teythoon> braunr: thread_commit_suicide ?
    <braunr> hehe
    <braunr> initially thread_terminate_self but
    <braunr> it can be used by other threads too
    <braunr> to i named it thread_terminate_release
    <braunr> http://darnassus.sceen.net/~rbraun/0001-pthread_thread_halt.patch
    <braunr>
      http://darnassus.sceen.net/~rbraun/0001-thread_terminate_release.patch
    <braunr> the pthread patch needs to be polished because it changes the
      semantics of pthread_thread_halt
    <braunr> but other than that, it should be complete
    <pinotree> pthread_thread_halt_reallyhalt
    <braunr> ok let's try these libc packages
    <braunr> old static ext2fs for the root, but other than that, it boots
    <braunr> let's try iceweasel
    <braunr> (i'll need to build a hurd package against this new libc, removing
      the libports_stability patch which prevents thread destruction in servers
      on the way)
    <teythoon> prevents thread destruction o_O
    <braunr> yes
    <braunr> in libports only ;p
    <teythoon> oh, *only* in libports, I assumed for a moment that it affected
      almost every component of the Hurd...
    <teythoon> *phew(
    <braunr> ... :)
    <braunr> that's why, after a burst of messages, say because of aptitude
      (select), you may see a few hundred threads still hanging around
    <braunr> also why unused servers remain running even after several minutes,
      where the normal timeout is 2mins
    <teythoon> I wondered about that, some servers (symlink comes to mind) seem
      to go away if unused (or that's how I read the code)
    <braunr> symlinks are usually not servers, since most of them actually
      exist in file systems, and are implemented through an optimization
    <teythoon> yes I know that
    <teythoon> trans/symlink.c reads:
    <teythoon>       /* The timeout here is 10 minutes */
    <teythoon>       err = mach_msg_server_timeout (fsys_server, 0, control,
    <teythoon> 				     MACH_RCV_TIMEOUT, 1000 * 60 * 10);
    <teythoon>       if (err == MACH_RCV_TIMED_OUT)
    <teythoon> 	exit (0);
    <braunr> ok
    <teythoon> hm, /hurd/symlink doesn't feel at all like a symlink... but
      works like one
    <braunr> well, starting iceweasel makes X on my host freeze oO
    <braunr> bbl
    <teythoon> /hurd/symlink translators do go away after being unused for 10
      minutes... this is funny if they are set up by hand instead of being
      started from a passive translator record
    <teythoon> magically vanishing symlinks ;)


## IRC, freenode, #hurd, 2013-09-19

    <braunr> hum, i can't rebuild a hurd package :(
    <teythoon> braunr: with your thread destruction patches in libc?
    <braunr> yes but it's unrelated
    <braunr> In file included from ../../libdiskfs/boot-start.c:38:0:
    <braunr> ./fsys_reply_U.h:173:15: error: conflicting types for
      ‘fsys_get_children’
    <braunr> i didn't see a new libc debian release
    <teythoon> hm, David reported that as well
    <teythoon>
      id:CAEvUa7=QzOiS41G5Vq8k4AiaN10jAPm+CL_205OHJnL0xpJXbw@mail.gmail.com
    <teythoon> uh oh
    <teythoon> it seems I didn't add a _reply suffix to the reply routines :/
    <teythoon> there's quite a bit of fallout from my patches, I kinda feel bad
      :(
    <braunr> teythoon: what i'm wondering is what youpi did too, since he got
      hurd binary packages
    <teythoon> braunr: well neither he nor I noticed that b/c for us the
      declarations were just missing
    <braunr> from libc you mean ?
    <braunr> or hum gnumach-common ?
    <teythoon> not sure actually
    <braunr> no it's not a gnumach thing
    <braunr> hurd-dev then
    <teythoon> the build system should have cought these, or mig...
    <braunr> also, i see you changed fsys_reply.defs, but nothing about
      fsys_request.defs
    <teythoon> I have no fsys_requests.defs
    <braunr> looks like there was no fsys_request.defs in the first place
      ... *sigh*
    <braunr> do you know an application that often creates and destroys threads
      ?
    <teythoon> no, sorry
    <pinotree> maybe some test suite
    <braunr> ah right
    <braunr> sysbench maybe
    <braunr> also, i've been hit by a lot more network deadlocks than usual
      lately
    <braunr> fixing netdde has gained some priority in my todo list


## IRC, freenode, #hurd, 2013-09-20

    <braunr> oh, git is multithreaded
    <braunr> great
    <braunr> so i've actually tested my libpthread patch quite a lot


## IRC, freenode, #hurd, 2013-09-25

    <braunr> on a side note, i was able to build gnumach/libc/hurd packages
      with thread destruction
    <teythoon> nice :)
    <braunr> they boot and work mostly fine, although they add their own issues
    <braunr> e.g. the comm field of the root ext2fs is empty
    <braunr> ps crashes when trying to display threads
    <braunr> but thread destruction actually works, i.e. servers (those that
      are configured that away at least) go away after some time, and even
      heavily used servers such as ext2fs dynamically scale over time :)


## IRC, freenode, #hurd, 2013-10-10

    <braunr> concerning threads, i think i figured out the last bugs i had with
      thread destruction
    <braunr> it should be well on its way to be merged by the end of the year


## IRC, freenode, #hurd, 2013-10-11

    <gg0> braunr: is your thread destruction patch ready for testing?
    <braunr> gg0: there are packages at my repository, yes
    <braunr> but i still have hurd fixes to do before i polish it
    <braunr> in particular, posix says returning from main() stops the entire
      process and all other threads
    <braunr> i didn't check that during the switch to pthreads, and ext2fs (and
      maybe others) actually return from main but expect other threads to live
      on
    <braunr> this creates problems when the main thread is actually destroyed,
      but not the process
    <teythoon> braunr: tmpfs does something like that, but calls pthread_exit
      at the end of main
    <braunr> same effect
    <braunr> this was fine with cthreads, but must be changed with pthreads
    <braunr> and libpthread must be fixed to enforce it
    <braunr> (or libc)

    <braunr> diskfs_startup_diskfs should probably be changed to reuse the main
      thread instead of returning


## IRC, freenode, #hurd, 2013-10-19

    <zacts> I know what threads are, but what is 'thread destruction'?
    <braunr> the hurd currently never destroys individual threads
    <braunr> they're destroyed when tasks are destroyed
    <braunr> if the number of threads in a task peaks at a high number, say
      thousands of them, they'll remain until the task is terminated
    <braunr> such tasks are usually file systems, normally never restarted (and
      in the case of the root file system, not restartable)
    <braunr> this results in a form of leak
    <braunr> another effect of this leak is that servers which should go away
      because of inactivity still remain
    <braunr> since thread destruction doesn't actually work, the debian package
      uses a patch to prevent worker threads from timeouting
    <braunr> and to finish with, since thread destruction actually doesn't
      work, normal (unpatched) applications that destroy threads are certainly
      failing bad
    <braunr> i just need to polish a few things, wait for youpi to finish his
      work on TLS to resolve conflicts, and that will be all


## IRC, freenode, #hurd, 2013-10-30

    <braunr> FYI, the packages on my repository enable actual thread
      destruction, and i've altered the libports_stability.patch
    <braunr> it nows only sets the global timeout to 0
    <braunr> now*
    <braunr> we actually can't let translator "die" on global timeout because
      of a race issue
    <braunr> tested for about two weeks now and no major problem sighted
    <braunr> top reports processes running for 100% of their time when
      terminating threads, but i expect it's simply mach/proc aggregating their
      run time to the task
    <braunr> 100% of cpu time


## IRC, freenode, #hurd, 2013-11-08

    <braunr> teythoon: darnassus is currently running a modified glibc with
      thread destruction, yes
    <teythoon> braunr: did that require any fixups in Hurd that I'd have missed
      ?
    <braunr> no
    <braunr> well
    <teythoon> b/c the resulting hurd package would not boot
    <braunr> actually yes
    <braunr> one
    <braunr> i'll push the patch somewhere
    <teythoon> iirc the mach-defpager spewed some error and /hurd/init failed
      to bootstrap the system
    <braunr> teythoon:
      http://darnassus.sceen.net/~rbraun/0001-Prevent-diskfs-translators-from-destroying-main-thre.patch
    <braunr> make sure you have the proper gnumach packages too :p
    <teythoon> well, that could very well account for my trouble ;)
    <teythoon> uh
    <teythoon> well
    <braunr> gnumach implements thread destruction, glibc uses it, hurd makes
      sure it doesn't exit from main


## IRC, freenode, #hurd, 2013-11-12

    <braunr> ok so, calling pthread_exit() from main isn't the same as
      returning from main()
    <braunr> unlike what some man pages seem to say
    <braunr> so loosing task info when destroying the main thread is actually a
      proc bug
    <braunr> ugh
    <teythoon> ^^
    <braunr> or a glibc one
    <teythoon> the proc server, your favorite Hurd component...
    <braunr> :)
    <braunr> hm :/
    <braunr> looks like command line arguments are stored on the stack of the
      main thread
    <braunr> and proc merely receives the addresses of those in the target task
    <neal> why not just keep the main thread around?
    <neal> it represents a minor resource leak, true
    <braunr> yes
    <braunr> that's the hack i suggested
    <neal> but it is relatively small
    <braunr> well no
    <braunr> my hack was about diskfs translators
    <braunr> it should be generalized in libpthread
    <braunr> seems reasonable
    <braunr> let's do it >)


## IRC, freenode, #hurd, 2013-11-13

    <youpi> braunr: there is a thread destruction issue in the experimental
      ocaml build, worth looking at, probably
    <braunr> what do you mean ?
    <youpi> ... testing 'testfork.ml': ocamlcocamlrun:
      ../libpthread/sysdeps/mach/pt-thread-halt.c:51: __pthread_thread_halt:
      Unexpected error: (ipc/send) invalid destination port.
    <youpi> during the experimental ocaml build
    <braunr> well yes
    <braunr> thread recycling is buggy
    <braunr> i had the choice to fix it, or implement true destruction
    <braunr> i'm tweaking my patch so it leaves the main thread stack untouched
      on destruction
    <braunr> and it should be ready
    <braunr> for review at least


## IRC, OFTC, #debian-hurd, 2013-11-13

    <gg0> ironforge out of memory during ruby1.9.1 rebuild. during test which
      creates 10000 threads
    <gg0> ironforge out of memory during ruby1.9.1 rebuild, test which creates
      10000 threads
    <gg0> i guess ironforge kernel has been rebuilt against -95, correct?
    <youpi> err, what kernel?
    <gg0> 23:37 < youpi> hurd needs a rebuild to be able to work with the newer
      eglibc
    <gg0> i mean hurd
    <youpi> yes, libc0.3 breaks the old packages anyway
    <gg0> wrt ENOMEM, was it expected?
    <gg0> wrt disk problems, aren't there on alioth only?
    <youpi> well 10,000 threads is a lot, especially on 32bit machine with 2M
      default stack  size
    <youpi> that makes 2GiB stacks
    <youpi> can't fit in a 2/2 split model, which gnumach uses
    <gg0> well, though active thread should die right away, just after set x to
      false, if i read it correctly
    <youpi> perhaps the stacks are not correctly reused
    <youpi> that's probably worth digging in libpthread
    <youpi> by putting printfs, etc.
    <youpi> it seems stacks are never reused indeed, damn
    <youpi> I just wrote a small test that creates threads which just print
      their stack address
    <youpi> that takes just a few minutes to do
    <gg0> i see. about reusage i guess you mean base address is kindof always
      incremented
    * gg0 likes being wrong
    <youpi> that's it, yes
    <youpi> gg0: take care, by keeping being wrong all the time, sometimes you
      get right ;)
    <youpi> and you are definitely right here :)
    <youpi> Mmm, but the stack is really deallocated
    <youpi> and the numbers wrap around
    <youpi> I wonder how that is :)
    <youpi> ok, creating 20 000 threads does work
    <youpi> perhaps ruby does odd things which makes it not work


### IRC, OFTC, #debian-hurd, 2013-11-14

    <gg0>  UID   PID  PPID TH  MSGI  MSGO    SZ   RSS SC STAT     TIME COMMAND
    <gg0> 1012 16446 15473 720  987   509 1.89G 23.6M  1 Hu    0:00.15
      /home/gg0-guest/ruby/ruby1.9.git/ruby1.9.1
      -I/home/gg0-guest/ruby/ruby1.9.git/lib -W0 bootstraptest.tmp.rb
    <gg0> 720 threads, stuck
    <youpi> 2G SZ is very big :)
    <gg0> 00:42 < youpi> perhaps ruby does odd things which makes it not work
    <gg0> is that enough to file a ruby bug? as ruby suggests itself btw
    <youpi> no, they will probably not be able to investigate
    <youpi> but you can already check out how they create threads
    <youpi> and try to reproduce the same with a small C program
    <gg0> ehm on ruby2.0 with *context _enabled_ i can not reproduce it

See [[/open_issues/glibc]] for `*context` functions.


## IRC, freenode, #hurd, 2013-11-14

    <braunr> nice, i got glibc packages with thread destruction
    <braunr> building hurd packages against it now
    <braunr> everything seems fine
    <braunr> hurd packages ready, let's see

    <gg0> ruby1.9.1 FTBFS due to a couple of tests
      https://buildd.debian.org/status/fetch.php?pkg=ruby1.9.1&arch=hurd-i386&ver=1.9.3.448-1&stamp=1384265526
    <gg0> second one creates 10000 threads and machine got ENOMEM
    <braunr> bootstraptest.tmp.rb: [BUG] [BUG] pthread_cond_init: Cannot
      allocate memory (ENOMEM) ew
    <gg0> few hours ago trying to reproduce it:
    <gg0> 01:20 < gg0>  UID   PID  PPID TH  MSGI  MSGO    SZ   RSS SC STAT
      TIME COMMAND
    <gg0> 01:20 < gg0> 1012 16446 15473 720  987   509 1.89G 23.6M  1 Hu
      0:00.15 /home/gg0-guest/ruby/ruby1.9.git/ruby1.9.1
      -I/home/gg0-guest/ruby/ruby1.9.git/lib -W0 bootstraptest.tmp.rb
    <braunr> yes that's expected
    <braunr> our stacks are 2M
    <braunr> 10k threads means right over 2G of stacks
    <braunr> userspace is restricted to 2G
    <gg0> but if i read correctly test in question, thread should just set x to
      false then die
    <braunr> so ?
    <gg0> and ENOMEM popped upk when there were thread count was at 720
    <braunr> hum
    <braunr> 10k threads would actually be 20G
    <braunr> 1k threads is 2G
    <braunr> 720 is about 1.5G
    <braunr> the rest is probably the ruby runtime
    <gg0> youpi tried to create 10000 thread, no problem. he guessed something
      wrong on ruby side
    <gg0> indeed on ruby2.0 such test succeeds
    <braunr> you can't create 10k threads unless you change the stack size
    <braunr> hurd servers use a stack size of 64k by default which allows them
      to go up to 30k iirc
    <braunr> but normal applications use the default 2M
    <gg0> i guess you mean 10000 threads active at the same time. test in
      question should make them die after simply setting x to false, i guess
      youpi's test did so as well
    <braunr> no
    <braunr> it's about stacks
    <braunr> hm
    <braunr> yes at the same time but
    <braunr> thread recycling is known to be buggy
    <braunr> which is what i'm currently fixing btw
    <neal> what's the bug?
    <braunr> neal: there are several subtle issues
    <braunr> for example, joining a thread that is also calling pthread_exit
      can fail badly
    <neal> hmm
    <neal> good that you are on it then :)
    <braunr> or detaching
    <braunr> i don't remember the details
    <braunr> but i remember such problems
    <braunr> apparently, keeping the stack of the main thread isn't enough
    <braunr> :(
    <braunr> for now, i'll keep the entire thread


## IRC, freenode, #hurd, 2013-11-15

    <gg0> i wasn't doing anything, just some single test runs. but yes, also
      that one which creates hundreds of threads
    <gg0> it would like creating 10000 but goes out of memory after ~720
    <gg0> btw same tests succeed on ruby2.0, so they should be fixed by
      backporting some changes
    <braunr> actually it looks more like a deadlock ..
    <gg0> deadlock that says ENOMEM?
    <braunr> ?
    <braunr> ENOMEM is returned because the test task has no more virtual
      memory
    <braunr> this doesn't mean the rest of the system should fail
    <gg0> ok i thought you were talking about such test
    <braunr> no it's something else
    <braunr> a deadlock in a critical server
    <braunr> the root file system maybe
    <gg0> braunr: htop and ps hang. just run the test once again
    <gg0> now you should still be able to login
    <braunr> htop/ps hanging means one process is unable to reply to queries
      sent to the message port/thread
    <braunr> procfs does that to report on what a process is waiting
    <braunr> it usually mean there is a bug around signals, since the message
      thread is also in charge of delivering signals
    <braunr> use ps -eM
    <braunr> and kill -KILL
    <braunr> hum
    <braunr> root       954 S<o   0:00.05 /hurd/crash --dump-core
    <braunr> dumping cores is known not to work most of the time
    <braunr> exodar shouldn't be configured like that
    <braunr> so yes, the crash server is hanging
    <braunr> gg0: i've set it to crash --kill and killed the hanging crash
      instances blocking top/ps
    <gg0> nice

    <braunr> my thread destruction patch and tls are indeed conflicting a bit
    <braunr> i suspect the tcb is used after being freed
    <braunr> i think i'll simply recycle the tcb, along with the pthread
      structs
    <braunr> ok i think it's fine now
    <braunr> there was also a small bug in the tls code, keeping a reference on
      the thread port
    <braunr> mach reference counting is so counter intuitive :/
    <braunr> well, error-prone

    <braunr> argh, more bugs in libc :(
    <teythoon> :/
    <teythoon> but don't worry, there is always one more bug ;)
    <braunr> this one might explain crashes that are long to trigger
    <braunr> _hurd_self_sigstate() is implemented like this :
      _hurd_thread_sigstate (__mach_thread_self ());
    <braunr> it leaks a reference on the current thread each time it's called
    <teythoon> >,<
    <braunr> but glibc maintains such references, so if the maximum value is
      reached, and references are dropped, the value can reach 0
    <teythoon> ouch
    <braunr> at which point any call on a thread will result in an invalid send
      right
    <braunr> and probably an assertion
    <teythoon> well it's a good thing then that you found it :)
    <braunr> i think it's always been there
    <braunr> but it's more apparent since jknoenig's patch on signal
      dispositions
    <braunr> the maximum number of user references in mach is 64k
    <braunr> this right leak isn't easy
    <braunr> tls is very tricky heh :)
    <braunr> for the main thread, tls initialization happens after the thread
      creation, obviously
    <braunr> but for other threads, it's initialized before starting them
    <braunr> the leak was probably an overlook caused by that complexity
    <braunr> teythoon: actually that leak i mentioned in _hurd_self_sigstate
      has only been recently added in Convert sigstate to TLS
    <braunr> so it's merely tls integration polishing
    <braunr> youpi: i'm currently reviewing changes related to tls and i think
      there is a bug in _hurd_self_sigstate
    <braunr> calls to mach_thread_self() should be paired with
      mach_port_deallocate to avoid urefs overflows
    <braunr> and right leaks
    <braunr> _hurd_critical_section_lock is probably affected too
    <braunr> hm
    <braunr> mhmm
    <braunr> in glibc, hurd/hurd/signal.h, _hurd_critical_section_lock
    <braunr> why is the sigstate unlocked after the call to
      _hurd_thread_sigstate
    <braunr> _hurd_thread_sigstate doesn't seem to lock it ..
    <braunr> unless __spin_lock_init does it
    <braunr> yes, leak solved :)


## IRC, freenode, #hurd, 2013-11-16

    <braunr> argh, _hurd_critical_section_lock is called before the send right
      on the main thread is fetched in libpthread :/
    <teythoon> is that bad ?
    <braunr> the sigstate is supposed to be initialized after pthreads
    <braunr> _hurd_critical_section_lock will create it if it sees there is
      none
    <braunr> creating the sigstate is currently what makes the send right leak
    <teythoon> ok
    <teythoon> it's bad then
    <braunr> it may be due to my patch
    <braunr> _hurd_critical_section_lock is called during pthreads
      initializatio
    <braunr> n
    <braunr> before the sigstate for the main thread is created, but after the
      pthread init routine is called
    <braunr> it does indeed look like the code wasn't written with thread being
      destroyed some day in mind :/
    <teythoon> braunr: btw, if you ever feel like benchmarking, sysbench has a
      benchmark for threads contending for a lock
    <braunr> yes i've used it before
    <teythoon> was it useful for this purpose ?
    <braunr> no :)
    <teythoon> :/
    <braunr> we already know libpthread isn't optimized
    <braunr> and felt it when we switched from cthreads
    <braunr> humpf
    <braunr> simply calling malloc implies a call to
      _hurd_critical_section_lock
    <braunr> on the other hand, unlike what some glibc comments say, this does
      work


## IRC, freenode, #hurd, 2013-11-17

    <braunr> looks like i've fixed all leak issues with thread destruction and
      tls :)
    <braunr> let's see if ext2fs.static works fine too
    <youpi> braunr: \o/
    <youpi> sorry about introducing the tls ones :)
    <braunr> no worries, it was expected
    <braunr> and tls was really needed :)
    <braunr> i mean, i expected to have some problems when rebasing on tls :p
    <teythoon> braunr: this is good news, how is your rootfs translator holding
      up?
    <braunr> building hurd packages right now
    <braunr> for now, only test applications and a few really multithreaded
      ones (e.g. iceweasel) have been tested
    <braunr> well, the system boots :)
    <teythoon> awesome :)
    <braunr> stressing the file system with git while watching youtube videos
      with gnash doesn't make the system crash
    <teythoon> you can actually watch yt videos on your Hurd box ?
    <braunr> yes
    <braunr> for a while now
    <teythoon> o_O
    <braunr> can't you ?
    <teythoon> I never even dared to try
    <braunr> hehe
    <braunr> teythoon: looks stable enough to install on darnassus


## IRC, freenode, #hurd, 2013-11-18

    <teythoon> braunr: wrt to your thread destruction patchset, I thought you
      also had to fix the proc server ?
    <braunr> teythoon: no
    <braunr> the problem was in glibc
    <braunr> i may have to fix proc/procfs though, because cpu time gets wrong
      with the patch
    <braunr> currently, it's the addition of the cpu time of all threads
    <braunr> mach provides aggregate times including destroyed threads though
    <teythoon> ah, I see
    <braunr> one side effect is that you'll see processes sometimes taking 100%
      of cpu time although the cpu is unused
    <braunr> or the cpu time of a process gets reduced :)
    <braunr> i guess the 100% cpu is how top sees a negative increment
    <teythoon> ^^
    <braunr> gg0: do my threadterm packages help with ruby1.9 ?
    <braunr> i mean, can you test with them some time ? :)


## IRC, freenode, #hurd, 2013-11-21

    <braunr> youpi: ping about my question regarding error handling in the
      proposed thread_terminate_release call
    <youpi> I agree with what Neal said
    <braunr> he didn't say anything about error handling
    <braunr> see
      http://lists.gnu.org/archive/html/bug-hurd/2013-11/msg00181.html
    <braunr> i think i should make the call fail on first error
    <braunr> it shouldn't happen, so it would merely serve to catch bugs
    <braunr> it's not easily recoverable (if it's recoverable at all)
    <youpi> uh, I thought he had
    <youpi> I must have dreamt

    <braunr> i think i'll go ahead with thread destruction integration


## IRC, freenode, #hurd, 2013-11-25

    <braunr> i've pushed the thread destruction patches for gnumach upstream
    <braunr> and made a branch in glibc for that too
    <teythoon> awesome :)
    <braunr> youpi: i don't remember how glibc changes should be managed
    <braunr> once those are applied, i'll commit in libpthread
    <youpi> braunr: usually we create a topgit branch, and then we add the
      patch from that to the debian repository


## IRC, freenode, #hurd, 2013-11-29

    <braunr> youpi: i still have a leak somewhere with the thread destruction
      patches
    <braunr> maybe on the host priv port in bootstrap servers (root fs and proc
      server)
    <braunr> it prevents priority adjusting in libports and can easily bring
      down a system because servers can start trashing a lot sooner, as it was
      the case during the pthread migration

See discussion about that on [[/open_issues/libpthread]].

    <braunr> so i'll hunt it down before merging


## IRC, freenode, #hurd, 2013-12-19

    <braunr> darnassus still has the libports priority adjustement leaks
    <braunr> i'll apply a few more patches to my hurd packages

    <braunr> humpf, proc seems to have a problem getting the host priv port :/
    <teythoon> thats bad
    <teythoon> what did you do ?
    <braunr> i fixed all the leaks in libports when adjusting priorities
    <braunr> the last one being releasing the host priv right
    <braunr> and i get errors at boot time from the proc server
    <teythoon> remember when i had this problem ?
    <braunr> proc doesn't get the host priv port the normal way since the
      normal way is to get it from proc iirc
    <teythoon> ah, thought you fixed that
    <braunr> so i guess the alternate way doesn't add a reference
    <braunr> well the leak is fixed
    <braunr> the problem you had was due to the leak which made the host priv
      port reach its max uref value
    <braunr> now it's just the proc server
    <braunr> the system works fine though
    <teythoon> for real ?
    <teythoon> the proc server needs the host priv port for getting the new
      tasks
    <braunr> well yes
    <teythoon> how can it work w/o it ?
    <braunr> i don't know ..
    <braunr> i guess the problem is internal to glibc
    <braunr> i mean, get_priv_ports fails, but that doesn't mean the host priv
      port is lost
    <teythoon> could be
    <teythoon> are you running a patched rootfs translator too ?
    <braunr> yes
    <teythoon> ok
    <teythoon> b/c i remember having trouble with that
    <braunr> right, the glibc call would make proc call __proc_getprivports
    <braunr> hum
    <braunr> teythoon: do you remember how proc gets its host priv port ?
    <teythoon> from init
    <teythoon> i think
    <braunr> startup_procinit ?
    <teythoon> possibly
    <braunr> right
    <braunr> so it's probably not the host priv port
    <braunr> i mean, the error is about another invalid send right
    <braunr> hm nope, it is on host_priv :/
    <braunr> hm ok i see, looks like a bug from a debian patch
    <braunr> or rather, a bug fix not yet imported into the debian package
    <braunr> teythoon: you actually fixed it in
      2c9422595f41635e2f4f7ef1afb7eece9001feae
    <braunr> great :)
    <teythoon> ah, that one
    <braunr> i was looking at the upstream code and couldn't understand what
      was going wrong
    <braunr> :)
    <braunr> much better
    <braunr> except ps -eT doesn't work any more ..
    <braunr> interestingly, with the thread destruction patch, ps -eT sometimes
      work, and sometimes doesn't
    <braunr> the behaviour doesn't seem to change without a reboot
    <braunr> and of course, as soon as i say it, i'm proven wrong by the next
      test :)


## IRC, freenode, #hurd, 2013-12-26

    <braunr> __pthread_sigstate_init doesn't seem to be converted to TLS in the
      upstream repository master branch

    <braunr> ah dammit, the global signal dispositions patch touches both glibc
      and libpthread @#!
    <braunr> what a mess

    <braunr> youpi: do you have some time to quickly review the
      rbraun/thread_destruction branch in libpthread ?
    <braunr> there might be conflict with some glibc patches
    <braunr> or do you prefer it on the mailing list ?
    <braunr> (i used a branch because it's not based on master)
    <youpi> rather mail the list, yes
    <braunr> ok
    <youpi> it'd also be useful to write the rationale
    <youpi> probably to be left as comment in the source code
    <braunr> yes, that branch was for personal storage :)
    <youpi> so the reader knows how things are recycled or not
    <braunr> hm
    <braunr> that should already be the case
    <youpi> ok
    <braunr> the two structures that are still recycled are the pthread struct
      and tls
    <braunr> it's quite obvious from pthread_alloc
    <braunr> and well commented there
    <braunr> for tls, it's explained in pthread_exit

    <braunr> there, thread destruction finally merged in
    <braunr> and now, we can remove the ugly hacks that were done for
      threadvars
    <braunr> :)
    <braunr> change stacks at will and support all sorts of weird languages and
      runtimes
    <teythoon> braunr: cool :)


## IRC, freenode, #hurd, 2013-12-31

    <youpi1> braunr: I've added sigstate_locking, sigstate_thread_reference and
      tls_thread_leak to the debian glibc 2.18 package
    <youpi1> I believe that's complete?
    <youpi1> is mach_msg_uspace_options ready for being added? Does it bring
      much speedup?
    <youpi1> AIUI, thread_terminate_release is  the union of the branches
      mentioned above?
    <youpi1> (I'm cleaning up branches in the glibc repo)
    <braunr> youpi1: mach_msg_uspace_options can be left over, it only affects
      selects and not noticeably
    <braunr> yes, those three branches are the only ones needed for thread
      destruction
    <youpi1> ok
    <youpi> does the hurd changes depend on these changes ?
    <braunr> no
    <youpi> good :)
    <braunr> only on tls for one of them
    <braunr> (it's about the default stack size of 64k for hurd servers)
    <youpi> and we have had this in debian for a long time already :)
    <braunr> yes
    <youpi> (how big were they before?)
    <youpi> (where they a couple MiB, and thus exploding to GiBs on thousands
      of threads?)
    <braunr> 64k
    <braunr> pthread stacks are 2M by default
    <braunr> yes


## IRC, freenode, #hurd, 2014-01-14

    <youpi> braunr: it seems your time change in libps made ps produce odd re
    <youpi> results
    <youpi>     samy 10987     5 -514358:-18:-42.17 /hurd/firmlink tmp
    <braunr> youpi: wow :)
    <braunr> that change is supposed to run on a system where threads actually
      get destroyed
    <braunr> but i don't see what could trigger this side effect
    <youpi>     root  8629   664 56 years make -j 3
    <youpi> :)
    <braunr> heh
    <braunr> youpi: does the hurd package on darnassus include that patch ?
    <youpi> yes
    <braunr> i don't reproduce the problem :/
    <youpi> err
    <braunr> what command are you using ?
    <youpi> ps -feM on darnassus
    <youpi>     root 29642   473 7 months /usr/sbin/sshd -R
    <braunr> hmmmm
    <braunr> i don't see it with a make -j
    <youpi> well, it's not systematic
    <youpi> it's like once over two launches
    <braunr> hhhhmmmmm
    <youpi> it'd look like some random numbers get added
    <braunr> strangely, the gcc processes started by a recursive make aren't
      children of make ..
    <braunr> ps -eF hurd seems to report the correct values
    <braunr> even ps -eM
    <braunr> oO
    <braunr> ps -ef too
    <braunr> the problem seems to be with ps -efM
    <youpi> too bad I'm always using that :)
    <braunr> another way to see it is that it makes us spot the issue ;p


### IRC, freenode, #hurd, 2014-01-15

    <braunr> ok i have an idea of what goes wrong in libps

    <braunr> youpi: for some reason, ps -efM lacks the PSTAT_TASK_BASIC flag
    <braunr> my patch is wrong since it doesn't try to determine whether the
      stats apply to a task or a thread, but that is easy to fix
    <braunr> ps -efM should nonetheless provide basic task info, obviously
    <braunr> in addition, the problems i've observed with ps -T (occasional
      segfaults) seem to have existed before thread destruction
    <braunr> they're just strongly exposed now that the thread list can be
      shrunk

    <braunr> libps is quite complicated
    <braunr> even hairy, i'd say ..


### IRC, freenode, #hurd, 2014-01-16

    <braunr> youpi: i think i have a proper fix for libps
    <braunr> i'll commit it soon
    <youpi> ok
    <braunr> basically, getting system times simply set the PSTAT_THREAD_BASIC
      flag
    <braunr> whereas getting the run time of the terminated threads requires
      PSTAT_TASK_BASIC
    <braunr> i assumed it was always set in the function i changed when dealing
      with a task and not a thread
    <braunr> and well, that was a wrong assumtion, -M can remove it if not
      strictly needed by the format
    <braunr> the default format asks for suspend_count, which forces the
      retrieval of task basic info, os it works with -eM
    <braunr> but -f doesn't :)
    <youpi> so extremely bad lucky combination of flags :)
    <braunr> indeed
    <braunr> i added a pstat_times using the last (!) available flag bit
    <braunr> looks clean to me
    <braunr> i hope there is no abi issue
    <braunr> (at least everything works with the unmodified ps-hurd executable
      and a new libps.so)

    <braunr> hm, small bug in the thread destruction patch :/


### IRC, freenode, #hurd, 2014-01-17

    <braunr> good, i have proper fixes for tls in the main thread and thread
      termination :)
    <teythoon> awesome :)
    <teythoon> i've been wondering, what does it take to get the thread
      destruction stuff into the debian package ?
    <braunr> i still have to build test packages, look for (unlikely, heh)
      regressions and work some integration details with samuel
    <braunr> hum the main thread tls fixup i guess
    <braunr> youpi was waiting for me to fix that
    <braunr> gnumach already provides the RPC
    <braunr> so it will be in glibc soon
    <braunr> i just have to get those last bits right
    <braunr> teythoon: i'm quite slow at integrating stuff
    <teythoon> and samuel then builds packages ?
    <teythoon> i mean, is our libc package build linked to the other libc
      packages ?
    <braunr> libpthread is applied as a patch to glibc
    <braunr> and loaded as a plugin


## IRC, freenode, #hurd, 2014-01-17

    <braunr> uhm, did we break fakeroot-tcp ?
    <teythoon> we did ?
    <youpi> fakeroot-tcp just works fine on buildds
    <braunr> with fakeroot-tcp, i get
    <braunr> make[4]: Entering directory
      `/home/rbraun/devel/debian/packages/hurd/hurd-0.5.git20140113/libdde-linux26/contrib/include'
    <braunr> rm -f .general.d
    <braunr> make[4]: *** [cleanall] Killed
    <braunr> when cleaning the package before building ..


### IRC, freenode, #hurd, 2014-01-18

    <braunr> damn, fakeroot-tcp won't work on darnassus ..
    <braunr> uh, looks like my tls/thread destruction "fixes" do cause
      regressions :(
    <braunr> fakeroot works fine with debian glibc
    <teythoon> which one ?
    <teythoon> which fakeroot i mean
    <braunr> -tcp
    <braunr> yes, it fails as soon as i use the patched glibc :/
    <braunr> at least it's easy to reproduce


### IRC, freenode, #hurd, 2014-01-20

    <braunr> great, 3rd libc version installed on darnassus, let's see if i can
      build hurd packages against that


### IRC, freenode, #hurd, 2014-01-21

    <braunr> damn, fakeroot-tcp still crashes with my latest changes ....

    <braunr> darnassus looks in good shape
    <braunr> youpi: ^
    <braunr> youpi: if you have other tests, feel free to do them now
    <braunr> i feel confident about committing the changes, if you're ok with
      it
    <youpi> which changes ?
    <youpi> I'm a bit lost in what you were talking about :)
    <braunr> you can find them in 2 patches in /var/tmp on darnassus
    <braunr> one is about fixing thread destruction
    <braunr> i'm pretty certain about this one so i'll commit it directly
    <braunr> the other is fixing the tcb of the main thread

[[open_issues/libpthread]].

    <braunr> where i simply do tcb->self = thread->kernel_thread :)
    <braunr> with a comment explaining why i don't do something else like
      deallocating the unused tcb
    <youpi> braunr: ok, that looks good
    <teythoon> braunr: awesome :)
    <braunr> youpi: ok


### IRC, freenode, #hurd, 2014-01-22

    <braunr> there, libpthread should be fine now


## IRC, freenode, #hurd, 2014-02-06

    <braunr> youpi: in case you're planning to upgrade glibc (or not), the
      thread destruction changes are complete
    <braunr> youpi: darnassus has been running them for some weeks with no
      visible regression
    <youpi> braunr: ok, good
    <youpi> including it in glibc was on my todo list indeed
    <youpi> and Adam  indeed plan for a 2.18 upload
    <braunr> good :)
    <youpi> braunr: this is up to 7c6dc6e28b2fc4b67934223f41cf080ffe58b230,
      right? (Wed Jan 22, Fix up the main thread TCB)
    <braunr> yes
    <braunr> oh, i just saw 2.17-98~0 glibc packages on debian-ports :)
    <youpi> yes, it's just to fix the dhcp crash
    <braunr> ah yes, it's not 2.18
    <youpi> 2.18 is available in experimental

    <youpi> braunr: just to make sure: did you have
      983b18a6ff16f5687a9ece63a50d1831dec88609 in libc on darnassus?
    <youpi> (which drops the stack size  hack)
    <braunr> youpi: let me check
    <braunr> youpi: ah no, i don't, you're right
    <youpi> well, I was just wondering, nothing make me think that was the case
      :)
    <youpi> what was the issue that it was raising btw?
    <braunr> threadvards
    <youpi> ok, b ut in which case?
    <youpi> (to make sure I test that before committing)
    <braunr> now that we switched to tls, i would assume the transition path to
      be 1/ hurd stops defining that symbol, 2/ libpthread can stop using it
    <braunr> the goal was to reduce the stack size of hurd server threads
    <youpi> well, that's not my question :) I'm wondering in which precise case
      that was breaking things
    <braunr> youpi: i don't know, it shouldn't break
    <youpi> ok
    <braunr> youpi: just in case, don't forget that last one line patch i
      committed last night, fakeroot can't work right without it
    <braunr> (i made a minor change while reviewing before comitting, and
      obviously got it wrong :p)
    <youpi> ok

    <youpi> braunr: I've upgraded libpthread in debian's eglibc btw

    <braunr>
      /home/rbraun/devel/debian/packages/eglibc/eglibc-2.17/build-tree/hurd-i386-libc/libc.so.phdr:
      *** executable stack signaled
    <braunr> from build-tree/hurd-i386-libc/elf/check-execstack.out
    <braunr> i thought glibc didn't use those
    <braunr> anyway it doesn't look to be the regression i'm having
    <braunr> does this ring a bell :
    <braunr> Encountered regressions that don't match expected failures
      (debian/testsuite-checking/expected-results-i486-gnu-libc):
    <braunr> test-stpcpy_chk.out, Error 1
    <braunr> TEST test-stpcpy_chk.out: __stpcpy_chk    normal_stpcpy
      simple_stpcpy_chk
    <youpi> nope
    <youpi> after what are you getting this regression?
    <braunr> building glibc 2.17-97 with thread destruction patches, including
      the one removing the stack size hack
    <braunr> during tests
    <braunr> there also are "progressions", but i'm not sure what these are
    <youpi> some progressions are just luck, other seem to happen on some
      platforms only
    <youpi> I'm not sure you want to test 2.17
    <youpi> a lot has changed between 2.17's libpthread and 2.18's libpthread
      (which is now equal to cvs's libpthread
    <youpi> )
    <youpi> s/cvs/git/
    <braunr> yes
    <braunr> i usually build with nocheck


## IRC, freenode, #hurd, 2014-02-07

    <braunr> youpi: on a vm with hurd 1:0.5.git20140203-1, upgrading to a
      patched glibc 2.17-97 that includes the patch which reverts the stack
      size hack, the system reboots and works fine
    <youpi> ok. I don't remember what problem I was seeing
    <braunr> that version of the hurd no longer defines the symbol
    <braunr> but even then, there shouldn't have been any problem
    <braunr> hm, or does it
    <braunr> yes, it does
    <braunr> youpi: the hurd package patch mentions
    <braunr> Revert this for now, will have to wait for dropping the use of
    <braunr> __pthread_stack_default_size from eglibc's
      libpthread_hurd_cond_wait.diff
    <braunr> i wonder how it got there
    <youpi> IIRC I was wondering too
    <braunr> i've installed my c library on darnassus and it works fine there
      too
    <braunr> with older (january) hurd packages
    <braunr> looks good to me


## IRC, freenode, #hurd, 2014-02-10

    <teythoon> braunr: btw, do the new libc packages contain your thread
      destruction work ?
    <braunr> teythoon: the -98 ones on experimental ?
    <braunr> i don't think they do
    <braunr> the -18 ones should do